Merge branch '20457-logs-and-mem-usage'
[arvados.git] / services / keep-balance / balance_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepbalance
6
7 import (
8         "crypto/md5"
9         "fmt"
10         "sort"
11         "strconv"
12         "testing"
13         "time"
14
15         "git.arvados.org/arvados.git/sdk/go/arvados"
16         "git.arvados.org/arvados.git/sdk/go/ctxlog"
17         check "gopkg.in/check.v1"
18 )
19
20 // Test with Gocheck
21 func Test(t *testing.T) {
22         check.TestingT(t)
23 }
24
25 var _ = check.Suite(&balancerSuite{})
26
27 type balancerSuite struct {
28         Balancer
29         srvs            []*KeepService
30         blks            map[string]tester
31         knownRendezvous [][]int
32         signatureTTL    int64
33 }
34
35 const (
36         // index into knownRendezvous
37         known0 = 0
38 )
39
40 type slots []int
41
42 type tester struct {
43         known       int
44         desired     map[string]int
45         current     slots
46         timestamps  []int64
47         shouldPull  slots
48         shouldTrash slots
49
50         shouldPullMounts  []string
51         shouldTrashMounts []string
52
53         expectBlockState *balancedBlockState
54         expectClassState map[string]balancedBlockState
55 }
56
57 func (bal *balancerSuite) SetUpSuite(c *check.C) {
58         bal.knownRendezvous = nil
59         for _, str := range []string{
60                 "3eab2d5fc9681074",
61                 "097dba52e648f1c3",
62                 "c5b4e023f8a7d691",
63                 "9d81c02e76a3bf54",
64         } {
65                 var slots []int
66                 for _, c := range []byte(str) {
67                         pos, _ := strconv.ParseUint(string(c), 16, 4)
68                         slots = append(slots, int(pos))
69                 }
70                 bal.knownRendezvous = append(bal.knownRendezvous, slots)
71         }
72
73         bal.signatureTTL = 3600
74         bal.Logger = ctxlog.TestLogger(c)
75 }
76
77 func (bal *balancerSuite) SetUpTest(c *check.C) {
78         bal.srvs = make([]*KeepService, 16)
79         bal.KeepServices = make(map[string]*KeepService)
80         for i := range bal.srvs {
81                 srv := &KeepService{
82                         KeepService: arvados.KeepService{
83                                 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
84                         },
85                 }
86                 srv.mounts = []*KeepMount{{
87                         KeepMount: arvados.KeepMount{
88                                 UUID:           fmt.Sprintf("zzzzz-mount-%015x", i),
89                                 StorageClasses: map[string]bool{"default": true},
90                         },
91                         KeepService: srv,
92                 }}
93                 bal.srvs[i] = srv
94                 bal.KeepServices[srv.UUID] = srv
95         }
96
97         bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
98         bal.cleanupMounts()
99 }
100
101 func (bal *balancerSuite) TestPerfect(c *check.C) {
102         bal.try(c, tester{
103                 desired:     map[string]int{"default": 2},
104                 current:     slots{0, 1},
105                 shouldPull:  nil,
106                 shouldTrash: nil,
107                 expectBlockState: &balancedBlockState{
108                         needed: 2,
109                 }})
110 }
111
112 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
113         bal.try(c, tester{
114                 desired:     map[string]int{"default": 2},
115                 current:     slots{0, 2, 1},
116                 shouldTrash: slots{2},
117                 expectBlockState: &balancedBlockState{
118                         needed:   2,
119                         unneeded: 1,
120                 }})
121 }
122
123 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
124         bal.try(c, tester{
125                 desired:     map[string]int{"default": 0},
126                 current:     slots{0, 1, 3},
127                 shouldTrash: slots{0, 1, 3},
128                 expectBlockState: &balancedBlockState{
129                         unneeded: 3,
130                 }})
131 }
132
133 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
134         bal.try(c, tester{
135                 desired:    map[string]int{"default": 4},
136                 current:    slots{0, 1},
137                 shouldPull: slots{2, 3},
138                 expectBlockState: &balancedBlockState{
139                         needed:  2,
140                         pulling: 2,
141                 }})
142 }
143
144 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
145         bal.srvList(0, slots{3})[0].ReadOnly = true
146         bal.try(c, tester{
147                 desired:    map[string]int{"default": 4},
148                 current:    slots{0, 1},
149                 shouldPull: slots{2, 4},
150                 expectBlockState: &balancedBlockState{
151                         needed:  2,
152                         pulling: 2,
153                 }})
154 }
155
156 func (bal *balancerSuite) TestMultipleViewsReadOnly(c *check.C) {
157         bal.testMultipleViews(c, true)
158 }
159
160 func (bal *balancerSuite) TestMultipleViews(c *check.C) {
161         bal.testMultipleViews(c, false)
162 }
163
164 func (bal *balancerSuite) testMultipleViews(c *check.C, readonly bool) {
165         for i, srv := range bal.srvs {
166                 // Add a mount to each service
167                 srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i)
168                 srv.mounts = append(srv.mounts, &KeepMount{
169                         KeepMount: arvados.KeepMount{
170                                 DeviceID:       bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.DeviceID,
171                                 UUID:           bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.UUID,
172                                 ReadOnly:       readonly,
173                                 Replication:    1,
174                                 StorageClasses: map[string]bool{"default": true},
175                         },
176                         KeepService: srv,
177                 })
178         }
179         for i := 1; i < len(bal.srvs); i++ {
180                 c.Logf("i=%d", i)
181                 if i == 4 {
182                         // Timestamps are all different, but one of
183                         // the mounts on srv[4] has the same device ID
184                         // where the non-deletable replica is stored
185                         // on srv[3], so only one replica is safe to
186                         // trash.
187                         bal.try(c, tester{
188                                 desired:     map[string]int{"default": 1},
189                                 current:     slots{0, i, i},
190                                 shouldTrash: slots{i}})
191                 } else if readonly {
192                         // Timestamps are all different, and the third
193                         // replica can't be trashed because it's on a
194                         // read-only mount, so the first two replicas
195                         // should be trashed.
196                         bal.try(c, tester{
197                                 desired:     map[string]int{"default": 1},
198                                 current:     slots{0, i, i},
199                                 shouldTrash: slots{0, i}})
200                 } else {
201                         // Timestamps are all different, so both
202                         // replicas on the non-optimal server should
203                         // be trashed.
204                         bal.try(c, tester{
205                                 desired:     map[string]int{"default": 1},
206                                 current:     slots{0, i, i},
207                                 shouldTrash: slots{i, i}})
208                 }
209                 // If the three replicas have identical timestamps,
210                 // none of them can be trashed safely.
211                 bal.try(c, tester{
212                         desired:    map[string]int{"default": 1},
213                         current:    slots{0, i, i},
214                         timestamps: []int64{12345678, 12345678, 12345678}})
215                 // If the first and third replicas have identical
216                 // timestamps, only the second replica should be
217                 // trashed.
218                 bal.try(c, tester{
219                         desired:     map[string]int{"default": 1},
220                         current:     slots{0, i, i},
221                         timestamps:  []int64{12345678, 12345679, 12345678},
222                         shouldTrash: slots{i}})
223         }
224 }
225
226 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
227         bal.try(c, tester{
228                 desired:    map[string]int{"default": 2},
229                 current:    slots{2, 0},
230                 shouldPull: slots{1}})
231         bal.try(c, tester{
232                 desired:    map[string]int{"default": 2},
233                 current:    slots{2, 7},
234                 shouldPull: slots{0, 1}})
235         // if only one of the pulls succeeds, we'll see this next:
236         bal.try(c, tester{
237                 desired:     map[string]int{"default": 2},
238                 current:     slots{2, 1, 7},
239                 shouldPull:  slots{0},
240                 shouldTrash: slots{7}})
241         // if both pulls succeed, we'll see this next:
242         bal.try(c, tester{
243                 desired:     map[string]int{"default": 2},
244                 current:     slots{2, 0, 1, 7},
245                 shouldTrash: slots{2, 7}})
246
247         // unbalanced + excessive replication => pull + trash
248         bal.try(c, tester{
249                 desired:     map[string]int{"default": 2},
250                 current:     slots{2, 5, 7},
251                 shouldPull:  slots{0, 1},
252                 shouldTrash: slots{7}})
253 }
254
255 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
256         for s, srv := range bal.srvs {
257                 for i := 0; i < 3; i++ {
258                         m := *(srv.mounts[0])
259                         m.UUID = fmt.Sprintf("zzzzz-mount-%015x", (s<<10)+i)
260                         srv.mounts = append(srv.mounts, &m)
261                 }
262         }
263         bal.try(c, tester{
264                 desired:    map[string]int{"default": 2},
265                 current:    slots{0, 0},
266                 shouldPull: slots{1}})
267         bal.try(c, tester{
268                 desired:    map[string]int{"default": 2},
269                 current:    slots{2, 2},
270                 shouldPull: slots{0, 1}})
271         bal.try(c, tester{
272                 desired:     map[string]int{"default": 2},
273                 current:     slots{0, 0, 1},
274                 shouldTrash: slots{0}})
275         bal.try(c, tester{
276                 desired:     map[string]int{"default": 2},
277                 current:     slots{1, 1, 0},
278                 shouldTrash: slots{1}})
279         bal.try(c, tester{
280                 desired:     map[string]int{"default": 2},
281                 current:     slots{1, 0, 1, 0, 2},
282                 shouldTrash: slots{0, 1, 2}})
283         bal.try(c, tester{
284                 desired:     map[string]int{"default": 2},
285                 current:     slots{1, 1, 1, 0, 2},
286                 shouldTrash: slots{1, 1, 2}})
287         bal.try(c, tester{
288                 desired:     map[string]int{"default": 2},
289                 current:     slots{1, 1, 2},
290                 shouldPull:  slots{0},
291                 shouldTrash: slots{1}})
292         bal.try(c, tester{
293                 desired:     map[string]int{"default": 2},
294                 current:     slots{1, 1, 0},
295                 timestamps:  []int64{12345678, 12345678, 12345679},
296                 shouldTrash: nil})
297         bal.try(c, tester{
298                 desired:    map[string]int{"default": 2},
299                 current:    slots{1, 1},
300                 shouldPull: slots{0}})
301 }
302
303 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
304         // For purposes of increasing replication, we assume identical
305         // replicas are distinct.
306         bal.try(c, tester{
307                 desired:    map[string]int{"default": 4},
308                 current:    slots{0, 1},
309                 timestamps: []int64{12345678, 12345678},
310                 shouldPull: slots{2, 3}})
311 }
312
313 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
314         // For purposes of decreasing replication, we assume identical
315         // replicas are NOT distinct.
316         bal.try(c, tester{
317                 desired:    map[string]int{"default": 2},
318                 current:    slots{0, 1, 2},
319                 timestamps: []int64{12345678, 12345678, 12345678}})
320         bal.try(c, tester{
321                 desired:    map[string]int{"default": 2},
322                 current:    slots{0, 1, 2},
323                 timestamps: []int64{12345678, 10000000, 10000000}})
324         bal.try(c, tester{
325                 desired:     map[string]int{"default": 0},
326                 current:     slots{0, 1, 2},
327                 timestamps:  []int64{12345678, 12345678, 12345678},
328                 shouldTrash: slots{0},
329                 shouldTrashMounts: []string{
330                         bal.srvs[bal.knownRendezvous[0][0]].mounts[0].UUID}})
331         bal.try(c, tester{
332                 desired:     map[string]int{"default": 2},
333                 current:     slots{0, 1, 2, 5, 6},
334                 timestamps:  []int64{12345678, 12345679, 10000000, 10000000, 10000000},
335                 shouldTrash: slots{2},
336                 shouldTrashMounts: []string{
337                         bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID}})
338         bal.try(c, tester{
339                 desired:     map[string]int{"default": 2},
340                 current:     slots{0, 1, 2, 5, 6},
341                 timestamps:  []int64{12345678, 12345679, 12345671, 10000000, 10000000},
342                 shouldTrash: slots{2, 5},
343                 shouldTrashMounts: []string{
344                         bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID,
345                         bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
346         bal.try(c, tester{
347                 desired:     map[string]int{"default": 2},
348                 current:     slots{0, 1, 2, 5, 6},
349                 timestamps:  []int64{12345678, 12345679, 12345679, 10000000, 10000000},
350                 shouldTrash: slots{5},
351                 shouldTrashMounts: []string{
352                         bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
353 }
354
355 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
356         oldTime := bal.MinMtime - 3600
357         newTime := bal.MinMtime + 3600
358         // The excess replica is too new to delete.
359         bal.try(c, tester{
360                 desired:    map[string]int{"default": 2},
361                 current:    slots{0, 1, 2},
362                 timestamps: []int64{oldTime, newTime, newTime + 1},
363                 expectBlockState: &balancedBlockState{
364                         needed:   2,
365                         unneeded: 1,
366                 }})
367         // The best replicas are too new to delete, but the excess
368         // replica is old enough.
369         bal.try(c, tester{
370                 desired:     map[string]int{"default": 2},
371                 current:     slots{0, 1, 2},
372                 timestamps:  []int64{newTime, newTime + 1, oldTime},
373                 shouldTrash: slots{2}})
374 }
375
376 func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
377         bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
378         bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
379         bal.srvs[14].mounts[0].KeepMount.UUID = bal.srvs[3].mounts[0].KeepMount.UUID
380         bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
381         c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
382         bal.cleanupMounts()
383         c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
384         bal.try(c, tester{
385                 known:      0,
386                 desired:    map[string]int{"default": 2},
387                 current:    slots{1},
388                 shouldPull: slots{2}})
389 }
390
391 func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
392         bal.srvs[0].mounts[0].KeepMount.Replication = 2  // srv 0
393         bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e
394         bal.cleanupMounts()
395         // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2
396         bal.try(c, tester{
397                 known:      0,
398                 desired:    map[string]int{"default": 2},
399                 current:    slots{1},
400                 shouldPull: slots{0},
401                 expectBlockState: &balancedBlockState{
402                         needed:  1,
403                         pulling: 1,
404                 }})
405         bal.try(c, tester{
406                 known:      0,
407                 desired:    map[string]int{"default": 2},
408                 current:    slots{0, 1},
409                 shouldPull: nil,
410                 expectBlockState: &balancedBlockState{
411                         needed: 2,
412                 }})
413         bal.try(c, tester{
414                 known:       0,
415                 desired:     map[string]int{"default": 2},
416                 current:     slots{0, 1, 2},
417                 shouldTrash: slots{2},
418                 expectBlockState: &balancedBlockState{
419                         needed:   2,
420                         unneeded: 1,
421                 }})
422         bal.try(c, tester{
423                 known:       0,
424                 desired:     map[string]int{"default": 3},
425                 current:     slots{0, 2, 3, 4},
426                 shouldPull:  slots{1},
427                 shouldTrash: slots{4},
428                 expectBlockState: &balancedBlockState{
429                         needed:   3,
430                         unneeded: 1,
431                         pulling:  1,
432                 }})
433         bal.try(c, tester{
434                 known:       0,
435                 desired:     map[string]int{"default": 3},
436                 current:     slots{0, 1, 2, 3, 4},
437                 shouldTrash: slots{2, 3, 4},
438                 expectBlockState: &balancedBlockState{
439                         needed:   2,
440                         unneeded: 3,
441                 }})
442         bal.try(c, tester{
443                 known:       0,
444                 desired:     map[string]int{"default": 4},
445                 current:     slots{0, 1, 2, 3, 4},
446                 shouldTrash: slots{3, 4},
447                 expectBlockState: &balancedBlockState{
448                         needed:   3,
449                         unneeded: 2,
450                 }})
451         // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2
452         bal.try(c, tester{
453                 known:   1,
454                 desired: map[string]int{"default": 2},
455                 current: slots{0},
456                 expectBlockState: &balancedBlockState{
457                         needed: 1,
458                 }})
459         bal.try(c, tester{
460                 known:      1,
461                 desired:    map[string]int{"default": 3},
462                 current:    slots{0},
463                 shouldPull: slots{1},
464                 expectBlockState: &balancedBlockState{
465                         needed:  1,
466                         pulling: 1,
467                 }})
468         bal.try(c, tester{
469                 known:      1,
470                 desired:    map[string]int{"default": 4},
471                 current:    slots{0},
472                 shouldPull: slots{1, 2},
473                 expectBlockState: &balancedBlockState{
474                         needed:  1,
475                         pulling: 2,
476                 }})
477         bal.try(c, tester{
478                 known:      1,
479                 desired:    map[string]int{"default": 4},
480                 current:    slots{2},
481                 shouldPull: slots{0, 1},
482                 expectBlockState: &balancedBlockState{
483                         needed:  1,
484                         pulling: 2,
485                 }})
486         bal.try(c, tester{
487                 known:      1,
488                 desired:    map[string]int{"default": 4},
489                 current:    slots{7},
490                 shouldPull: slots{0, 1, 2},
491                 expectBlockState: &balancedBlockState{
492                         needed:  1,
493                         pulling: 3,
494                 }})
495         bal.try(c, tester{
496                 known:       1,
497                 desired:     map[string]int{"default": 2},
498                 current:     slots{1, 2, 3, 4},
499                 shouldPull:  slots{0},
500                 shouldTrash: slots{3, 4},
501                 expectBlockState: &balancedBlockState{
502                         needed:   2,
503                         unneeded: 2,
504                         pulling:  1,
505                 }})
506         bal.try(c, tester{
507                 known:       1,
508                 desired:     map[string]int{"default": 2},
509                 current:     slots{0, 1, 2},
510                 shouldTrash: slots{1, 2},
511                 expectBlockState: &balancedBlockState{
512                         needed:   1,
513                         unneeded: 2,
514                 }})
515 }
516
517 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
518         dupUUID := bal.srvs[0].mounts[0].KeepMount.UUID
519         bal.srvs[9].mounts[0].KeepMount.UUID = dupUUID
520         bal.srvs[14].mounts[0].KeepMount.UUID = dupUUID
521         // block 0 belongs on servers 3 and e, which have different
522         // UUIDs.
523         bal.try(c, tester{
524                 known:      0,
525                 desired:    map[string]int{"default": 2},
526                 current:    slots{1},
527                 shouldPull: slots{0}})
528         // block 1 belongs on servers 0 and 9, which both report
529         // having a replica, but the replicas are on the same volume
530         // -- so we should pull to the third position (7).
531         bal.try(c, tester{
532                 known:      1,
533                 desired:    map[string]int{"default": 2},
534                 current:    slots{0, 1},
535                 shouldPull: slots{2}})
536         // block 1 can be pulled to the doubly-mounted volume, but the
537         // pull should only be done on the first of the two servers.
538         bal.try(c, tester{
539                 known:      1,
540                 desired:    map[string]int{"default": 2},
541                 current:    slots{2},
542                 shouldPull: slots{0}})
543         // block 0 has one replica on a single volume mounted on two
544         // servers (e,9 at positions 1,9). Trashing the replica on 9
545         // would lose the block.
546         bal.try(c, tester{
547                 known:      0,
548                 desired:    map[string]int{"default": 2},
549                 current:    slots{1, 9},
550                 shouldPull: slots{0},
551                 expectBlockState: &balancedBlockState{
552                         needed:  1,
553                         pulling: 1,
554                 }})
555         // block 0 is overreplicated, but the second and third
556         // replicas are the same replica according to volume UUID
557         // (despite different Mtimes). Don't trash the third replica.
558         bal.try(c, tester{
559                 known:   0,
560                 desired: map[string]int{"default": 2},
561                 current: slots{0, 1, 9},
562                 expectBlockState: &balancedBlockState{
563                         needed: 2,
564                 }})
565         // block 0 is overreplicated; the third and fifth replicas are
566         // extra, but the fourth is another view of the second and
567         // shouldn't be trashed.
568         bal.try(c, tester{
569                 known:       0,
570                 desired:     map[string]int{"default": 2},
571                 current:     slots{0, 1, 5, 9, 12},
572                 shouldTrash: slots{5, 12},
573                 expectBlockState: &balancedBlockState{
574                         needed:   2,
575                         unneeded: 2,
576                 }})
577 }
578
579 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
580         // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
581         // probe order. For these tests we give it two mounts, one
582         // with classes=[special], one with
583         // classes=[special,special2].
584         bal.srvs[9].mounts = []*KeepMount{{
585                 KeepMount: arvados.KeepMount{
586                         Replication:    1,
587                         StorageClasses: map[string]bool{"special": true},
588                         UUID:           "zzzzz-mount-special00000009",
589                         DeviceID:       "9-special",
590                 },
591                 KeepService: bal.srvs[9],
592         }, {
593                 KeepMount: arvados.KeepMount{
594                         Replication:    1,
595                         StorageClasses: map[string]bool{"special": true, "special2": true},
596                         UUID:           "zzzzz-mount-special20000009",
597                         DeviceID:       "9-special-and-special2",
598                 },
599                 KeepService: bal.srvs[9],
600         }}
601         // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
602         // probe order. We give it two mounts, one with
603         // classes=[special3], one with classes=[default].
604         bal.srvs[13].mounts = []*KeepMount{{
605                 KeepMount: arvados.KeepMount{
606                         Replication:    1,
607                         StorageClasses: map[string]bool{"special2": true},
608                         UUID:           "zzzzz-mount-special2000000d",
609                         DeviceID:       "13-special2",
610                 },
611                 KeepService: bal.srvs[13],
612         }, {
613                 KeepMount: arvados.KeepMount{
614                         Replication:    1,
615                         StorageClasses: map[string]bool{"default": true},
616                         UUID:           "zzzzz-mount-00000000000000d",
617                         DeviceID:       "13-default",
618                 },
619                 KeepService: bal.srvs[13],
620         }}
621         // Pull to slot 9 because that's the only server with the
622         // desired class "special".
623         bal.try(c, tester{
624                 known:            0,
625                 desired:          map[string]int{"default": 2, "special": 1},
626                 current:          slots{0, 1},
627                 shouldPull:       slots{9},
628                 shouldPullMounts: []string{"zzzzz-mount-special20000009"}})
629         // If some storage classes are not satisfied, don't trash any
630         // excess replicas. (E.g., if someone desires repl=1 on
631         // class=durable, and we have two copies on class=volatile, we
632         // should wait for pull to succeed before trashing anything).
633         bal.try(c, tester{
634                 known:            0,
635                 desired:          map[string]int{"special": 1},
636                 current:          slots{0, 1},
637                 shouldPull:       slots{9},
638                 shouldPullMounts: []string{"zzzzz-mount-special20000009"}})
639         // Once storage classes are satisfied, trash excess replicas
640         // that appear earlier in probe order but aren't needed to
641         // satisfy the desired classes.
642         bal.try(c, tester{
643                 known:       0,
644                 desired:     map[string]int{"special": 1},
645                 current:     slots{0, 1, 9},
646                 shouldTrash: slots{0, 1}})
647         // Pull to slot 5, the best server with class "special2".
648         bal.try(c, tester{
649                 known:            0,
650                 desired:          map[string]int{"special2": 1},
651                 current:          slots{0, 1},
652                 shouldPull:       slots{5},
653                 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
654         // Pull to slot 5 and 9 to get replication 2 in desired class
655         // "special2".
656         bal.try(c, tester{
657                 known:            0,
658                 desired:          map[string]int{"special2": 2},
659                 current:          slots{0, 1},
660                 shouldPull:       slots{5, 9},
661                 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
662         // Slot 0 has a replica in "default", slot 1 has a replica
663         // in "special"; we need another replica in "default", i.e.,
664         // on slot 2.
665         bal.try(c, tester{
666                 known:      1,
667                 desired:    map[string]int{"default": 2, "special": 1},
668                 current:    slots{0, 1},
669                 shouldPull: slots{2}})
670         // Pull to best probe position 0 (despite wrong storage class)
671         // if it's impossible to achieve desired replication in the
672         // desired class (only slots 1 and 3 have special2).
673         bal.try(c, tester{
674                 known:      1,
675                 desired:    map[string]int{"special2": 3},
676                 current:    slots{3},
677                 shouldPull: slots{0, 1}})
678         // Trash excess replica.
679         bal.try(c, tester{
680                 known:       3,
681                 desired:     map[string]int{"special": 1},
682                 current:     slots{0, 1},
683                 shouldTrash: slots{1}})
684         // Leave one copy on slot 1 because slot 0 (server 9) only
685         // gives us repl=1.
686         bal.try(c, tester{
687                 known:   3,
688                 desired: map[string]int{"special": 2},
689                 current: slots{0, 1}})
690 }
691
692 // Clear all servers' changesets, balance a single block, and verify
693 // the appropriate changes for that block have been added to the
694 // changesets.
695 func (bal *balancerSuite) try(c *check.C, t tester) {
696         bal.setupLookupTables()
697         blk := &BlockState{
698                 Replicas: bal.replList(t.known, t.current),
699                 Desired:  t.desired,
700         }
701         for i, t := range t.timestamps {
702                 blk.Replicas[i].Mtime = t
703         }
704         for _, srv := range bal.srvs {
705                 srv.ChangeSet = &ChangeSet{}
706         }
707         result := bal.balanceBlock(knownBlkid(t.known), blk)
708
709         var didPull, didTrash slots
710         var didPullMounts, didTrashMounts []string
711         for i, srv := range bal.srvs {
712                 var slot int
713                 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
714                         if srvNum == i {
715                                 slot = probeOrder
716                         }
717                 }
718                 for _, pull := range srv.Pulls {
719                         didPull = append(didPull, slot)
720                         didPullMounts = append(didPullMounts, pull.To.UUID)
721                         c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
722                 }
723                 for _, trash := range srv.Trashes {
724                         didTrash = append(didTrash, slot)
725                         didTrashMounts = append(didTrashMounts, trash.From.UUID)
726                         c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
727                 }
728         }
729
730         for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
731                 sort.Sort(sort.IntSlice(list))
732         }
733         c.Check(didPull, check.DeepEquals, t.shouldPull)
734         c.Check(didTrash, check.DeepEquals, t.shouldTrash)
735         if t.shouldPullMounts != nil {
736                 sort.Strings(didPullMounts)
737                 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
738         }
739         if t.shouldTrashMounts != nil {
740                 sort.Strings(didTrashMounts)
741                 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
742         }
743         if t.expectBlockState != nil {
744                 c.Check(result.blockState, check.Equals, *t.expectBlockState)
745         }
746         if t.expectClassState != nil {
747                 c.Check(result.classState, check.DeepEquals, t.expectClassState)
748         }
749 }
750
751 // srvList returns the KeepServices, sorted in rendezvous order and
752 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
753 // returns the first-, second-, and fifth-best servers for storing
754 // bal.knownBlkid(3).
755 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
756         for _, i := range order {
757                 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
758         }
759         return
760 }
761
762 // replList is like srvList but returns an "existing replicas" slice,
763 // suitable for a BlockState test fixture.
764 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
765         nextMnt := map[*KeepService]int{}
766         mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
767         for _, srv := range bal.srvList(knownBlockID, order) {
768                 // round-robin repls onto each srv's mounts
769                 n := nextMnt[srv]
770                 nextMnt[srv] = (n + 1) % len(srv.mounts)
771
772                 repls = append(repls, Replica{srv.mounts[n], mtime})
773                 mtime++
774         }
775         return
776 }
777
778 // generate the same data hashes that are tested in
779 // sdk/go/keepclient/root_sorter_test.go
780 func knownBlkid(i int) arvados.SizedDigest {
781         return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))
782 }