Merge branch '16535-s3'
[arvados.git] / services / keep-balance / balance_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "crypto/md5"
9         "fmt"
10         "sort"
11         "strconv"
12         "testing"
13         "time"
14
15         "git.arvados.org/arvados.git/sdk/go/arvados"
16         "git.arvados.org/arvados.git/sdk/go/ctxlog"
17         check "gopkg.in/check.v1"
18 )
19
20 // Test with Gocheck
21 func Test(t *testing.T) {
22         check.TestingT(t)
23 }
24
25 var _ = check.Suite(&balancerSuite{})
26
27 type balancerSuite struct {
28         Balancer
29         srvs            []*KeepService
30         blks            map[string]tester
31         knownRendezvous [][]int
32         signatureTTL    int64
33 }
34
35 const (
36         // index into knownRendezvous
37         known0 = 0
38 )
39
40 type slots []int
41
42 type tester struct {
43         known       int
44         desired     map[string]int
45         current     slots
46         timestamps  []int64
47         shouldPull  slots
48         shouldTrash slots
49
50         shouldPullMounts  []string
51         shouldTrashMounts []string
52
53         expectBlockState *balancedBlockState
54         expectClassState map[string]balancedBlockState
55 }
56
57 func (bal *balancerSuite) SetUpSuite(c *check.C) {
58         bal.knownRendezvous = nil
59         for _, str := range []string{
60                 "3eab2d5fc9681074",
61                 "097dba52e648f1c3",
62                 "c5b4e023f8a7d691",
63                 "9d81c02e76a3bf54",
64         } {
65                 var slots []int
66                 for _, c := range []byte(str) {
67                         pos, _ := strconv.ParseUint(string(c), 16, 4)
68                         slots = append(slots, int(pos))
69                 }
70                 bal.knownRendezvous = append(bal.knownRendezvous, slots)
71         }
72
73         bal.signatureTTL = 3600
74         bal.Logger = ctxlog.TestLogger(c)
75 }
76
77 func (bal *balancerSuite) SetUpTest(c *check.C) {
78         bal.srvs = make([]*KeepService, 16)
79         bal.KeepServices = make(map[string]*KeepService)
80         for i := range bal.srvs {
81                 srv := &KeepService{
82                         KeepService: arvados.KeepService{
83                                 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
84                         },
85                 }
86                 srv.mounts = []*KeepMount{{
87                         KeepMount: arvados.KeepMount{
88                                 UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
89                         },
90                         KeepService: srv,
91                 }}
92                 bal.srvs[i] = srv
93                 bal.KeepServices[srv.UUID] = srv
94         }
95
96         bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
97         bal.cleanupMounts()
98 }
99
100 func (bal *balancerSuite) TestPerfect(c *check.C) {
101         bal.try(c, tester{
102                 desired:     map[string]int{"default": 2},
103                 current:     slots{0, 1},
104                 shouldPull:  nil,
105                 shouldTrash: nil,
106                 expectBlockState: &balancedBlockState{
107                         needed: 2,
108                 }})
109 }
110
111 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
112         bal.try(c, tester{
113                 desired:     map[string]int{"default": 2},
114                 current:     slots{0, 2, 1},
115                 shouldTrash: slots{2},
116                 expectBlockState: &balancedBlockState{
117                         needed:   2,
118                         unneeded: 1,
119                 }})
120 }
121
122 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
123         bal.try(c, tester{
124                 desired:     map[string]int{"default": 0},
125                 current:     slots{0, 1, 3},
126                 shouldTrash: slots{0, 1, 3},
127                 expectBlockState: &balancedBlockState{
128                         unneeded: 3,
129                 }})
130 }
131
132 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
133         bal.try(c, tester{
134                 desired:    map[string]int{"default": 4},
135                 current:    slots{0, 1},
136                 shouldPull: slots{2, 3},
137                 expectBlockState: &balancedBlockState{
138                         needed:  2,
139                         pulling: 2,
140                 }})
141 }
142
143 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
144         bal.srvList(0, slots{3})[0].ReadOnly = true
145         bal.try(c, tester{
146                 desired:    map[string]int{"default": 4},
147                 current:    slots{0, 1},
148                 shouldPull: slots{2, 4},
149                 expectBlockState: &balancedBlockState{
150                         needed:  2,
151                         pulling: 2,
152                 }})
153 }
154
155 func (bal *balancerSuite) TestMultipleViewsReadOnly(c *check.C) {
156         bal.testMultipleViews(c, true)
157 }
158
159 func (bal *balancerSuite) TestMultipleViews(c *check.C) {
160         bal.testMultipleViews(c, false)
161 }
162
163 func (bal *balancerSuite) testMultipleViews(c *check.C, readonly bool) {
164         for i, srv := range bal.srvs {
165                 // Add a mount to each service
166                 srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i)
167                 srv.mounts = append(srv.mounts, &KeepMount{
168                         KeepMount: arvados.KeepMount{
169                                 DeviceID:    fmt.Sprintf("writable-by-srv-%x", (i+1)%len(bal.srvs)),
170                                 UUID:        fmt.Sprintf("zzzzz-mount-%015x", i<<16),
171                                 ReadOnly:    readonly,
172                                 Replication: 1,
173                         },
174                         KeepService: srv,
175                 })
176         }
177         for i := 1; i < len(bal.srvs); i++ {
178                 c.Logf("i=%d", i)
179                 if i == 4 {
180                         // Timestamps are all different, but one of
181                         // the mounts on srv[4] has the same device ID
182                         // where the non-deletable replica is stored
183                         // on srv[3], so only one replica is safe to
184                         // trash.
185                         bal.try(c, tester{
186                                 desired:     map[string]int{"default": 1},
187                                 current:     slots{0, i, i},
188                                 shouldTrash: slots{i}})
189                 } else if readonly {
190                         // Timestamps are all different, and the third
191                         // replica can't be trashed because it's on a
192                         // read-only mount, so the first two replicas
193                         // should be trashed.
194                         bal.try(c, tester{
195                                 desired:     map[string]int{"default": 1},
196                                 current:     slots{0, i, i},
197                                 shouldTrash: slots{0, i}})
198                 } else {
199                         // Timestamps are all different, so both
200                         // replicas on the non-optimal server should
201                         // be trashed.
202                         bal.try(c, tester{
203                                 desired:     map[string]int{"default": 1},
204                                 current:     slots{0, i, i},
205                                 shouldTrash: slots{i, i}})
206                 }
207                 // If the three replicas have identical timestamps,
208                 // none of them can be trashed safely.
209                 bal.try(c, tester{
210                         desired:    map[string]int{"default": 1},
211                         current:    slots{0, i, i},
212                         timestamps: []int64{12345678, 12345678, 12345678}})
213                 // If the first and third replicas have identical
214                 // timestamps, only the second replica should be
215                 // trashed.
216                 bal.try(c, tester{
217                         desired:     map[string]int{"default": 1},
218                         current:     slots{0, i, i},
219                         timestamps:  []int64{12345678, 12345679, 12345678},
220                         shouldTrash: slots{i}})
221         }
222 }
223
224 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
225         bal.try(c, tester{
226                 desired:    map[string]int{"default": 2},
227                 current:    slots{2, 0},
228                 shouldPull: slots{1}})
229         bal.try(c, tester{
230                 desired:    map[string]int{"default": 2},
231                 current:    slots{2, 7},
232                 shouldPull: slots{0, 1}})
233         // if only one of the pulls succeeds, we'll see this next:
234         bal.try(c, tester{
235                 desired:     map[string]int{"default": 2},
236                 current:     slots{2, 1, 7},
237                 shouldPull:  slots{0},
238                 shouldTrash: slots{7}})
239         // if both pulls succeed, we'll see this next:
240         bal.try(c, tester{
241                 desired:     map[string]int{"default": 2},
242                 current:     slots{2, 0, 1, 7},
243                 shouldTrash: slots{2, 7}})
244
245         // unbalanced + excessive replication => pull + trash
246         bal.try(c, tester{
247                 desired:     map[string]int{"default": 2},
248                 current:     slots{2, 5, 7},
249                 shouldPull:  slots{0, 1},
250                 shouldTrash: slots{7}})
251 }
252
253 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
254         for s, srv := range bal.srvs {
255                 for i := 0; i < 3; i++ {
256                         m := *(srv.mounts[0])
257                         m.UUID = fmt.Sprintf("zzzzz-mount-%015x", (s<<10)+i)
258                         srv.mounts = append(srv.mounts, &m)
259                 }
260         }
261         bal.try(c, tester{
262                 desired:    map[string]int{"default": 2},
263                 current:    slots{0, 0},
264                 shouldPull: slots{1}})
265         bal.try(c, tester{
266                 desired:    map[string]int{"default": 2},
267                 current:    slots{2, 2},
268                 shouldPull: slots{0, 1}})
269         bal.try(c, tester{
270                 desired:     map[string]int{"default": 2},
271                 current:     slots{0, 0, 1},
272                 shouldTrash: slots{0}})
273         bal.try(c, tester{
274                 desired:     map[string]int{"default": 2},
275                 current:     slots{1, 1, 0},
276                 shouldTrash: slots{1}})
277         bal.try(c, tester{
278                 desired:     map[string]int{"default": 2},
279                 current:     slots{1, 0, 1, 0, 2},
280                 shouldTrash: slots{0, 1, 2}})
281         bal.try(c, tester{
282                 desired:     map[string]int{"default": 2},
283                 current:     slots{1, 1, 1, 0, 2},
284                 shouldTrash: slots{1, 1, 2}})
285         bal.try(c, tester{
286                 desired:     map[string]int{"default": 2},
287                 current:     slots{1, 1, 2},
288                 shouldPull:  slots{0},
289                 shouldTrash: slots{1}})
290         bal.try(c, tester{
291                 desired:     map[string]int{"default": 2},
292                 current:     slots{1, 1, 0},
293                 timestamps:  []int64{12345678, 12345678, 12345679},
294                 shouldTrash: nil})
295         bal.try(c, tester{
296                 desired:    map[string]int{"default": 2},
297                 current:    slots{1, 1},
298                 shouldPull: slots{0}})
299 }
300
301 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
302         // For purposes of increasing replication, we assume identical
303         // replicas are distinct.
304         bal.try(c, tester{
305                 desired:    map[string]int{"default": 4},
306                 current:    slots{0, 1},
307                 timestamps: []int64{12345678, 12345678},
308                 shouldPull: slots{2, 3}})
309 }
310
311 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
312         // For purposes of decreasing replication, we assume identical
313         // replicas are NOT distinct.
314         bal.try(c, tester{
315                 desired:    map[string]int{"default": 2},
316                 current:    slots{0, 1, 2},
317                 timestamps: []int64{12345678, 12345678, 12345678}})
318         bal.try(c, tester{
319                 desired:    map[string]int{"default": 2},
320                 current:    slots{0, 1, 2},
321                 timestamps: []int64{12345678, 10000000, 10000000}})
322 }
323
324 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
325         oldTime := bal.MinMtime - 3600
326         newTime := bal.MinMtime + 3600
327         // The excess replica is too new to delete.
328         bal.try(c, tester{
329                 desired:    map[string]int{"default": 2},
330                 current:    slots{0, 1, 2},
331                 timestamps: []int64{oldTime, newTime, newTime + 1},
332                 expectBlockState: &balancedBlockState{
333                         needed:   2,
334                         unneeded: 1,
335                 }})
336         // The best replicas are too new to delete, but the excess
337         // replica is old enough.
338         bal.try(c, tester{
339                 desired:     map[string]int{"default": 2},
340                 current:     slots{0, 1, 2},
341                 timestamps:  []int64{newTime, newTime + 1, oldTime},
342                 shouldTrash: slots{2}})
343 }
344
345 func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
346         bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
347         bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
348         bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
349         c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
350         bal.cleanupMounts()
351         c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
352         bal.try(c, tester{
353                 known:      0,
354                 desired:    map[string]int{"default": 2},
355                 current:    slots{1},
356                 shouldPull: slots{2}})
357 }
358
359 func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
360         bal.srvs[0].mounts[0].KeepMount.Replication = 2  // srv 0
361         bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e
362         bal.cleanupMounts()
363         // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2
364         bal.try(c, tester{
365                 known:      0,
366                 desired:    map[string]int{"default": 2},
367                 current:    slots{1},
368                 shouldPull: slots{0},
369                 expectBlockState: &balancedBlockState{
370                         needed:  1,
371                         pulling: 1,
372                 }})
373         bal.try(c, tester{
374                 known:      0,
375                 desired:    map[string]int{"default": 2},
376                 current:    slots{0, 1},
377                 shouldPull: nil,
378                 expectBlockState: &balancedBlockState{
379                         needed: 2,
380                 }})
381         bal.try(c, tester{
382                 known:       0,
383                 desired:     map[string]int{"default": 2},
384                 current:     slots{0, 1, 2},
385                 shouldTrash: slots{2},
386                 expectBlockState: &balancedBlockState{
387                         needed:   2,
388                         unneeded: 1,
389                 }})
390         bal.try(c, tester{
391                 known:       0,
392                 desired:     map[string]int{"default": 3},
393                 current:     slots{0, 2, 3, 4},
394                 shouldPull:  slots{1},
395                 shouldTrash: slots{4},
396                 expectBlockState: &balancedBlockState{
397                         needed:   3,
398                         unneeded: 1,
399                         pulling:  1,
400                 }})
401         bal.try(c, tester{
402                 known:       0,
403                 desired:     map[string]int{"default": 3},
404                 current:     slots{0, 1, 2, 3, 4},
405                 shouldTrash: slots{2, 3, 4},
406                 expectBlockState: &balancedBlockState{
407                         needed:   2,
408                         unneeded: 3,
409                 }})
410         bal.try(c, tester{
411                 known:       0,
412                 desired:     map[string]int{"default": 4},
413                 current:     slots{0, 1, 2, 3, 4},
414                 shouldTrash: slots{3, 4},
415                 expectBlockState: &balancedBlockState{
416                         needed:   3,
417                         unneeded: 2,
418                 }})
419         // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2
420         bal.try(c, tester{
421                 known:   1,
422                 desired: map[string]int{"default": 2},
423                 current: slots{0},
424                 expectBlockState: &balancedBlockState{
425                         needed: 1,
426                 }})
427         bal.try(c, tester{
428                 known:      1,
429                 desired:    map[string]int{"default": 3},
430                 current:    slots{0},
431                 shouldPull: slots{1},
432                 expectBlockState: &balancedBlockState{
433                         needed:  1,
434                         pulling: 1,
435                 }})
436         bal.try(c, tester{
437                 known:      1,
438                 desired:    map[string]int{"default": 4},
439                 current:    slots{0},
440                 shouldPull: slots{1, 2},
441                 expectBlockState: &balancedBlockState{
442                         needed:  1,
443                         pulling: 2,
444                 }})
445         bal.try(c, tester{
446                 known:      1,
447                 desired:    map[string]int{"default": 4},
448                 current:    slots{2},
449                 shouldPull: slots{0, 1},
450                 expectBlockState: &balancedBlockState{
451                         needed:  1,
452                         pulling: 2,
453                 }})
454         bal.try(c, tester{
455                 known:      1,
456                 desired:    map[string]int{"default": 4},
457                 current:    slots{7},
458                 shouldPull: slots{0, 1, 2},
459                 expectBlockState: &balancedBlockState{
460                         needed:  1,
461                         pulling: 3,
462                 }})
463         bal.try(c, tester{
464                 known:       1,
465                 desired:     map[string]int{"default": 2},
466                 current:     slots{1, 2, 3, 4},
467                 shouldPull:  slots{0},
468                 shouldTrash: slots{3, 4},
469                 expectBlockState: &balancedBlockState{
470                         needed:   2,
471                         unneeded: 2,
472                         pulling:  1,
473                 }})
474         bal.try(c, tester{
475                 known:       1,
476                 desired:     map[string]int{"default": 2},
477                 current:     slots{0, 1, 2},
478                 shouldTrash: slots{1, 2},
479                 expectBlockState: &balancedBlockState{
480                         needed:   1,
481                         unneeded: 2,
482                 }})
483 }
484
485 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
486         bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
487         bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
488         bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
489         // block 0 belongs on servers 3 and e, which have different
490         // device IDs.
491         bal.try(c, tester{
492                 known:      0,
493                 desired:    map[string]int{"default": 2},
494                 current:    slots{1},
495                 shouldPull: slots{0}})
496         // block 1 belongs on servers 0 and 9, which both report
497         // having a replica, but the replicas are on the same device
498         // ID -- so we should pull to the third position (7).
499         bal.try(c, tester{
500                 known:      1,
501                 desired:    map[string]int{"default": 2},
502                 current:    slots{0, 1},
503                 shouldPull: slots{2}})
504         // block 1 can be pulled to the doubly-mounted device, but the
505         // pull should only be done on the first of the two servers.
506         bal.try(c, tester{
507                 known:      1,
508                 desired:    map[string]int{"default": 2},
509                 current:    slots{2},
510                 shouldPull: slots{0}})
511         // block 0 has one replica on a single device mounted on two
512         // servers (e,9 at positions 1,9). Trashing the replica on 9
513         // would lose the block.
514         bal.try(c, tester{
515                 known:      0,
516                 desired:    map[string]int{"default": 2},
517                 current:    slots{1, 9},
518                 shouldPull: slots{0},
519                 expectBlockState: &balancedBlockState{
520                         needed:  1,
521                         pulling: 1,
522                 }})
523         // block 0 is overreplicated, but the second and third
524         // replicas are the same replica according to DeviceID
525         // (despite different Mtimes). Don't trash the third replica.
526         bal.try(c, tester{
527                 known:   0,
528                 desired: map[string]int{"default": 2},
529                 current: slots{0, 1, 9},
530                 expectBlockState: &balancedBlockState{
531                         needed: 2,
532                 }})
533         // block 0 is overreplicated; the third and fifth replicas are
534         // extra, but the fourth is another view of the second and
535         // shouldn't be trashed.
536         bal.try(c, tester{
537                 known:       0,
538                 desired:     map[string]int{"default": 2},
539                 current:     slots{0, 1, 5, 9, 12},
540                 shouldTrash: slots{5, 12},
541                 expectBlockState: &balancedBlockState{
542                         needed:   2,
543                         unneeded: 2,
544                 }})
545 }
546
547 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
548         // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
549         // probe order. For these tests we give it two mounts, one
550         // with classes=[special], one with
551         // classes=[special,special2].
552         bal.srvs[9].mounts = []*KeepMount{{
553                 KeepMount: arvados.KeepMount{
554                         Replication:    1,
555                         StorageClasses: map[string]bool{"special": true},
556                         UUID:           "zzzzz-mount-special00000009",
557                         DeviceID:       "9-special",
558                 },
559                 KeepService: bal.srvs[9],
560         }, {
561                 KeepMount: arvados.KeepMount{
562                         Replication:    1,
563                         StorageClasses: map[string]bool{"special": true, "special2": true},
564                         UUID:           "zzzzz-mount-special20000009",
565                         DeviceID:       "9-special-and-special2",
566                 },
567                 KeepService: bal.srvs[9],
568         }}
569         // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
570         // probe order. We give it two mounts, one with
571         // classes=[special3], one with classes=[default].
572         bal.srvs[13].mounts = []*KeepMount{{
573                 KeepMount: arvados.KeepMount{
574                         Replication:    1,
575                         StorageClasses: map[string]bool{"special2": true},
576                         UUID:           "zzzzz-mount-special2000000d",
577                         DeviceID:       "13-special2",
578                 },
579                 KeepService: bal.srvs[13],
580         }, {
581                 KeepMount: arvados.KeepMount{
582                         Replication:    1,
583                         StorageClasses: map[string]bool{"default": true},
584                         UUID:           "zzzzz-mount-00000000000000d",
585                         DeviceID:       "13-default",
586                 },
587                 KeepService: bal.srvs[13],
588         }}
589         // Pull to slot 9 because that's the only server with the
590         // desired class "special".
591         bal.try(c, tester{
592                 known:            0,
593                 desired:          map[string]int{"default": 2, "special": 1},
594                 current:          slots{0, 1},
595                 shouldPull:       slots{9},
596                 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
597         // If some storage classes are not satisfied, don't trash any
598         // excess replicas. (E.g., if someone desires repl=1 on
599         // class=durable, and we have two copies on class=volatile, we
600         // should wait for pull to succeed before trashing anything).
601         bal.try(c, tester{
602                 known:            0,
603                 desired:          map[string]int{"special": 1},
604                 current:          slots{0, 1},
605                 shouldPull:       slots{9},
606                 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
607         // Once storage classes are satisfied, trash excess replicas
608         // that appear earlier in probe order but aren't needed to
609         // satisfy the desired classes.
610         bal.try(c, tester{
611                 known:       0,
612                 desired:     map[string]int{"special": 1},
613                 current:     slots{0, 1, 9},
614                 shouldTrash: slots{0, 1}})
615         // Pull to slot 5, the best server with class "special2".
616         bal.try(c, tester{
617                 known:            0,
618                 desired:          map[string]int{"special2": 1},
619                 current:          slots{0, 1},
620                 shouldPull:       slots{5},
621                 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
622         // Pull to slot 5 and 9 to get replication 2 in desired class
623         // "special2".
624         bal.try(c, tester{
625                 known:            0,
626                 desired:          map[string]int{"special2": 2},
627                 current:          slots{0, 1},
628                 shouldPull:       slots{5, 9},
629                 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
630         // Slot 0 has a replica in "default", slot 1 has a replica
631         // in "special"; we need another replica in "default", i.e.,
632         // on slot 2.
633         bal.try(c, tester{
634                 known:      1,
635                 desired:    map[string]int{"default": 2, "special": 1},
636                 current:    slots{0, 1},
637                 shouldPull: slots{2}})
638         // Pull to best probe position 0 (despite wrong storage class)
639         // if it's impossible to achieve desired replication in the
640         // desired class (only slots 1 and 3 have special2).
641         bal.try(c, tester{
642                 known:      1,
643                 desired:    map[string]int{"special2": 3},
644                 current:    slots{3},
645                 shouldPull: slots{0, 1}})
646         // Trash excess replica.
647         bal.try(c, tester{
648                 known:       3,
649                 desired:     map[string]int{"special": 1},
650                 current:     slots{0, 1},
651                 shouldTrash: slots{1}})
652         // Leave one copy on slot 1 because slot 0 (server 9) only
653         // gives us repl=1.
654         bal.try(c, tester{
655                 known:   3,
656                 desired: map[string]int{"special": 2},
657                 current: slots{0, 1}})
658 }
659
660 // Clear all servers' changesets, balance a single block, and verify
661 // the appropriate changes for that block have been added to the
662 // changesets.
663 func (bal *balancerSuite) try(c *check.C, t tester) {
664         bal.setupLookupTables()
665         blk := &BlockState{
666                 Replicas: bal.replList(t.known, t.current),
667                 Desired:  t.desired,
668         }
669         for i, t := range t.timestamps {
670                 blk.Replicas[i].Mtime = t
671         }
672         for _, srv := range bal.srvs {
673                 srv.ChangeSet = &ChangeSet{}
674         }
675         result := bal.balanceBlock(knownBlkid(t.known), blk)
676
677         var didPull, didTrash slots
678         var didPullMounts, didTrashMounts []string
679         for i, srv := range bal.srvs {
680                 var slot int
681                 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
682                         if srvNum == i {
683                                 slot = probeOrder
684                         }
685                 }
686                 for _, pull := range srv.Pulls {
687                         didPull = append(didPull, slot)
688                         didPullMounts = append(didPullMounts, pull.To.UUID)
689                         c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
690                 }
691                 for _, trash := range srv.Trashes {
692                         didTrash = append(didTrash, slot)
693                         didTrashMounts = append(didTrashMounts, trash.From.UUID)
694                         c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
695                 }
696         }
697
698         for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
699                 sort.Sort(sort.IntSlice(list))
700         }
701         c.Check(didPull, check.DeepEquals, t.shouldPull)
702         c.Check(didTrash, check.DeepEquals, t.shouldTrash)
703         if t.shouldPullMounts != nil {
704                 sort.Strings(didPullMounts)
705                 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
706         }
707         if t.shouldTrashMounts != nil {
708                 sort.Strings(didTrashMounts)
709                 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
710         }
711         if t.expectBlockState != nil {
712                 c.Check(result.blockState, check.Equals, *t.expectBlockState)
713         }
714         if t.expectClassState != nil {
715                 c.Check(result.classState, check.DeepEquals, t.expectClassState)
716         }
717 }
718
719 // srvList returns the KeepServices, sorted in rendezvous order and
720 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
721 // returns the first-, second-, and fifth-best servers for storing
722 // bal.knownBlkid(3).
723 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
724         for _, i := range order {
725                 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
726         }
727         return
728 }
729
730 // replList is like srvList but returns an "existing replicas" slice,
731 // suitable for a BlockState test fixture.
732 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
733         nextMnt := map[*KeepService]int{}
734         mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
735         for _, srv := range bal.srvList(knownBlockID, order) {
736                 // round-robin repls onto each srv's mounts
737                 n := nextMnt[srv]
738                 nextMnt[srv] = (n + 1) % len(srv.mounts)
739
740                 repls = append(repls, Replica{srv.mounts[n], mtime})
741                 mtime++
742         }
743         return
744 }
745
746 // generate the same data hashes that are tested in
747 // sdk/go/keepclient/root_sorter_test.go
748 func knownBlkid(i int) arvados.SizedDigest {
749         return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))
750 }