1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.arvados.org/arvados.git/sdk/go/arvados"
16 "git.arvados.org/arvados.git/sdk/go/ctxlog"
17 check "gopkg.in/check.v1"
21 func Test(t *testing.T) {
25 var _ = check.Suite(&balancerSuite{})
27 type balancerSuite struct {
30 blks map[string]tester
31 knownRendezvous [][]int
36 // index into knownRendezvous
44 desired map[string]int
50 shouldPullMounts []string
51 shouldTrashMounts []string
53 expectBlockState *balancedBlockState
54 expectClassState map[string]balancedBlockState
57 func (bal *balancerSuite) SetUpSuite(c *check.C) {
58 bal.knownRendezvous = nil
59 for _, str := range []string{
66 for _, c := range []byte(str) {
67 pos, _ := strconv.ParseUint(string(c), 16, 4)
68 slots = append(slots, int(pos))
70 bal.knownRendezvous = append(bal.knownRendezvous, slots)
73 bal.signatureTTL = 3600
74 bal.Logger = ctxlog.TestLogger(c)
77 func (bal *balancerSuite) SetUpTest(c *check.C) {
78 bal.srvs = make([]*KeepService, 16)
79 bal.KeepServices = make(map[string]*KeepService)
80 for i := range bal.srvs {
82 KeepService: arvados.KeepService{
83 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
86 srv.mounts = []*KeepMount{{
87 KeepMount: arvados.KeepMount{
88 UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
89 StorageClasses: map[string]bool{"default": true},
94 bal.KeepServices[srv.UUID] = srv
97 bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
101 func (bal *balancerSuite) TestPerfect(c *check.C) {
103 desired: map[string]int{"default": 2},
104 current: slots{0, 1},
107 expectBlockState: &balancedBlockState{
112 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
114 desired: map[string]int{"default": 2},
115 current: slots{0, 2, 1},
116 shouldTrash: slots{2},
117 expectBlockState: &balancedBlockState{
123 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
125 desired: map[string]int{"default": 0},
126 current: slots{0, 1, 3},
127 shouldTrash: slots{0, 1, 3},
128 expectBlockState: &balancedBlockState{
133 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
135 desired: map[string]int{"default": 4},
136 current: slots{0, 1},
137 shouldPull: slots{2, 3},
138 expectBlockState: &balancedBlockState{
144 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
145 bal.srvList(0, slots{3})[0].ReadOnly = true
147 desired: map[string]int{"default": 4},
148 current: slots{0, 1},
149 shouldPull: slots{2, 4},
150 expectBlockState: &balancedBlockState{
156 func (bal *balancerSuite) TestMultipleViewsReadOnly(c *check.C) {
157 bal.testMultipleViews(c, true)
160 func (bal *balancerSuite) TestMultipleViews(c *check.C) {
161 bal.testMultipleViews(c, false)
164 func (bal *balancerSuite) testMultipleViews(c *check.C, readonly bool) {
165 for i, srv := range bal.srvs {
166 // Add a mount to each service
167 srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i)
168 srv.mounts = append(srv.mounts, &KeepMount{
169 KeepMount: arvados.KeepMount{
170 DeviceID: bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.DeviceID,
171 UUID: bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.UUID,
174 StorageClasses: map[string]bool{"default": true},
179 for i := 1; i < len(bal.srvs); i++ {
182 // Timestamps are all different, but one of
183 // the mounts on srv[4] has the same device ID
184 // where the non-deletable replica is stored
185 // on srv[3], so only one replica is safe to
188 desired: map[string]int{"default": 1},
189 current: slots{0, i, i},
190 shouldTrash: slots{i}})
192 // Timestamps are all different, and the third
193 // replica can't be trashed because it's on a
194 // read-only mount, so the first two replicas
195 // should be trashed.
197 desired: map[string]int{"default": 1},
198 current: slots{0, i, i},
199 shouldTrash: slots{0, i}})
201 // Timestamps are all different, so both
202 // replicas on the non-optimal server should
205 desired: map[string]int{"default": 1},
206 current: slots{0, i, i},
207 shouldTrash: slots{i, i}})
209 // If the three replicas have identical timestamps,
210 // none of them can be trashed safely.
212 desired: map[string]int{"default": 1},
213 current: slots{0, i, i},
214 timestamps: []int64{12345678, 12345678, 12345678}})
215 // If the first and third replicas have identical
216 // timestamps, only the second replica should be
219 desired: map[string]int{"default": 1},
220 current: slots{0, i, i},
221 timestamps: []int64{12345678, 12345679, 12345678},
222 shouldTrash: slots{i}})
226 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
228 desired: map[string]int{"default": 2},
229 current: slots{2, 0},
230 shouldPull: slots{1}})
232 desired: map[string]int{"default": 2},
233 current: slots{2, 7},
234 shouldPull: slots{0, 1}})
235 // if only one of the pulls succeeds, we'll see this next:
237 desired: map[string]int{"default": 2},
238 current: slots{2, 1, 7},
239 shouldPull: slots{0},
240 shouldTrash: slots{7}})
241 // if both pulls succeed, we'll see this next:
243 desired: map[string]int{"default": 2},
244 current: slots{2, 0, 1, 7},
245 shouldTrash: slots{2, 7}})
247 // unbalanced + excessive replication => pull + trash
249 desired: map[string]int{"default": 2},
250 current: slots{2, 5, 7},
251 shouldPull: slots{0, 1},
252 shouldTrash: slots{7}})
255 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
256 for s, srv := range bal.srvs {
257 for i := 0; i < 3; i++ {
258 m := *(srv.mounts[0])
259 m.UUID = fmt.Sprintf("zzzzz-mount-%015x", (s<<10)+i)
260 srv.mounts = append(srv.mounts, &m)
264 desired: map[string]int{"default": 2},
265 current: slots{0, 0},
266 shouldPull: slots{1}})
268 desired: map[string]int{"default": 2},
269 current: slots{2, 2},
270 shouldPull: slots{0, 1}})
272 desired: map[string]int{"default": 2},
273 current: slots{0, 0, 1},
274 shouldTrash: slots{0}})
276 desired: map[string]int{"default": 2},
277 current: slots{1, 1, 0},
278 shouldTrash: slots{1}})
280 desired: map[string]int{"default": 2},
281 current: slots{1, 0, 1, 0, 2},
282 shouldTrash: slots{0, 1, 2}})
284 desired: map[string]int{"default": 2},
285 current: slots{1, 1, 1, 0, 2},
286 shouldTrash: slots{1, 1, 2}})
288 desired: map[string]int{"default": 2},
289 current: slots{1, 1, 2},
290 shouldPull: slots{0},
291 shouldTrash: slots{1}})
293 desired: map[string]int{"default": 2},
294 current: slots{1, 1, 0},
295 timestamps: []int64{12345678, 12345678, 12345679},
298 desired: map[string]int{"default": 2},
299 current: slots{1, 1},
300 shouldPull: slots{0}})
303 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
304 // For purposes of increasing replication, we assume identical
305 // replicas are distinct.
307 desired: map[string]int{"default": 4},
308 current: slots{0, 1},
309 timestamps: []int64{12345678, 12345678},
310 shouldPull: slots{2, 3}})
313 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
314 // For purposes of decreasing replication, we assume identical
315 // replicas are NOT distinct.
317 desired: map[string]int{"default": 2},
318 current: slots{0, 1, 2},
319 timestamps: []int64{12345678, 12345678, 12345678}})
321 desired: map[string]int{"default": 2},
322 current: slots{0, 1, 2},
323 timestamps: []int64{12345678, 10000000, 10000000}})
325 desired: map[string]int{"default": 0},
326 current: slots{0, 1, 2},
327 timestamps: []int64{12345678, 12345678, 12345678},
328 shouldTrash: slots{0},
329 shouldTrashMounts: []string{
330 bal.srvs[bal.knownRendezvous[0][0]].mounts[0].UUID}})
332 desired: map[string]int{"default": 2},
333 current: slots{0, 1, 2, 5, 6},
334 timestamps: []int64{12345678, 12345679, 10000000, 10000000, 10000000},
335 shouldTrash: slots{2},
336 shouldTrashMounts: []string{
337 bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID}})
339 desired: map[string]int{"default": 2},
340 current: slots{0, 1, 2, 5, 6},
341 timestamps: []int64{12345678, 12345679, 12345671, 10000000, 10000000},
342 shouldTrash: slots{2, 5},
343 shouldTrashMounts: []string{
344 bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID,
345 bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
347 desired: map[string]int{"default": 2},
348 current: slots{0, 1, 2, 5, 6},
349 timestamps: []int64{12345678, 12345679, 12345679, 10000000, 10000000},
350 shouldTrash: slots{5},
351 shouldTrashMounts: []string{
352 bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
355 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
356 oldTime := bal.MinMtime - 3600
357 newTime := bal.MinMtime + 3600
358 // The excess replica is too new to delete.
360 desired: map[string]int{"default": 2},
361 current: slots{0, 1, 2},
362 timestamps: []int64{oldTime, newTime, newTime + 1},
363 expectBlockState: &balancedBlockState{
367 // The best replicas are too new to delete, but the excess
368 // replica is old enough.
370 desired: map[string]int{"default": 2},
371 current: slots{0, 1, 2},
372 timestamps: []int64{newTime, newTime + 1, oldTime},
373 shouldTrash: slots{2}})
376 func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
377 bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
378 bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
379 bal.srvs[14].mounts[0].KeepMount.UUID = bal.srvs[3].mounts[0].KeepMount.UUID
380 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
381 c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
383 c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
386 desired: map[string]int{"default": 2},
388 shouldPull: slots{2}})
391 func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
392 bal.srvs[0].mounts[0].KeepMount.Replication = 2 // srv 0
393 bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e
395 // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2
398 desired: map[string]int{"default": 2},
400 shouldPull: slots{0},
401 expectBlockState: &balancedBlockState{
407 desired: map[string]int{"default": 2},
408 current: slots{0, 1},
410 expectBlockState: &balancedBlockState{
415 desired: map[string]int{"default": 2},
416 current: slots{0, 1, 2},
417 shouldTrash: slots{2},
418 expectBlockState: &balancedBlockState{
424 desired: map[string]int{"default": 3},
425 current: slots{0, 2, 3, 4},
426 shouldPull: slots{1},
427 shouldTrash: slots{4},
428 expectBlockState: &balancedBlockState{
435 desired: map[string]int{"default": 3},
436 current: slots{0, 1, 2, 3, 4},
437 shouldTrash: slots{2, 3, 4},
438 expectBlockState: &balancedBlockState{
444 desired: map[string]int{"default": 4},
445 current: slots{0, 1, 2, 3, 4},
446 shouldTrash: slots{3, 4},
447 expectBlockState: &balancedBlockState{
451 // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2
454 desired: map[string]int{"default": 2},
456 expectBlockState: &balancedBlockState{
461 desired: map[string]int{"default": 3},
463 shouldPull: slots{1},
464 expectBlockState: &balancedBlockState{
470 desired: map[string]int{"default": 4},
472 shouldPull: slots{1, 2},
473 expectBlockState: &balancedBlockState{
479 desired: map[string]int{"default": 4},
481 shouldPull: slots{0, 1},
482 expectBlockState: &balancedBlockState{
488 desired: map[string]int{"default": 4},
490 shouldPull: slots{0, 1, 2},
491 expectBlockState: &balancedBlockState{
497 desired: map[string]int{"default": 2},
498 current: slots{1, 2, 3, 4},
499 shouldPull: slots{0},
500 shouldTrash: slots{3, 4},
501 expectBlockState: &balancedBlockState{
508 desired: map[string]int{"default": 2},
509 current: slots{0, 1, 2},
510 shouldTrash: slots{1, 2},
511 expectBlockState: &balancedBlockState{
517 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
518 dupUUID := bal.srvs[0].mounts[0].KeepMount.UUID
519 bal.srvs[9].mounts[0].KeepMount.UUID = dupUUID
520 bal.srvs[14].mounts[0].KeepMount.UUID = dupUUID
521 // block 0 belongs on servers 3 and e, which have different
525 desired: map[string]int{"default": 2},
527 shouldPull: slots{0}})
528 // block 1 belongs on servers 0 and 9, which both report
529 // having a replica, but the replicas are on the same volume
530 // -- so we should pull to the third position (7).
533 desired: map[string]int{"default": 2},
534 current: slots{0, 1},
535 shouldPull: slots{2}})
536 // block 1 can be pulled to the doubly-mounted volume, but the
537 // pull should only be done on the first of the two servers.
540 desired: map[string]int{"default": 2},
542 shouldPull: slots{0}})
543 // block 0 has one replica on a single volume mounted on two
544 // servers (e,9 at positions 1,9). Trashing the replica on 9
545 // would lose the block.
548 desired: map[string]int{"default": 2},
549 current: slots{1, 9},
550 shouldPull: slots{0},
551 expectBlockState: &balancedBlockState{
555 // block 0 is overreplicated, but the second and third
556 // replicas are the same replica according to volume UUID
557 // (despite different Mtimes). Don't trash the third replica.
560 desired: map[string]int{"default": 2},
561 current: slots{0, 1, 9},
562 expectBlockState: &balancedBlockState{
565 // block 0 is overreplicated; the third and fifth replicas are
566 // extra, but the fourth is another view of the second and
567 // shouldn't be trashed.
570 desired: map[string]int{"default": 2},
571 current: slots{0, 1, 5, 9, 12},
572 shouldTrash: slots{5, 12},
573 expectBlockState: &balancedBlockState{
579 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
580 // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
581 // probe order. For these tests we give it two mounts, one
582 // with classes=[special], one with
583 // classes=[special,special2].
584 bal.srvs[9].mounts = []*KeepMount{{
585 KeepMount: arvados.KeepMount{
587 StorageClasses: map[string]bool{"special": true},
588 UUID: "zzzzz-mount-special00000009",
589 DeviceID: "9-special",
591 KeepService: bal.srvs[9],
593 KeepMount: arvados.KeepMount{
595 StorageClasses: map[string]bool{"special": true, "special2": true},
596 UUID: "zzzzz-mount-special20000009",
597 DeviceID: "9-special-and-special2",
599 KeepService: bal.srvs[9],
601 // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
602 // probe order. We give it two mounts, one with
603 // classes=[special3], one with classes=[default].
604 bal.srvs[13].mounts = []*KeepMount{{
605 KeepMount: arvados.KeepMount{
607 StorageClasses: map[string]bool{"special2": true},
608 UUID: "zzzzz-mount-special2000000d",
609 DeviceID: "13-special2",
611 KeepService: bal.srvs[13],
613 KeepMount: arvados.KeepMount{
615 StorageClasses: map[string]bool{"default": true},
616 UUID: "zzzzz-mount-00000000000000d",
617 DeviceID: "13-default",
619 KeepService: bal.srvs[13],
621 // Pull to slot 9 because that's the only server with the
622 // desired class "special".
625 desired: map[string]int{"default": 2, "special": 1},
626 current: slots{0, 1},
627 shouldPull: slots{9},
628 shouldPullMounts: []string{"zzzzz-mount-special20000009"}})
629 // If some storage classes are not satisfied, don't trash any
630 // excess replicas. (E.g., if someone desires repl=1 on
631 // class=durable, and we have two copies on class=volatile, we
632 // should wait for pull to succeed before trashing anything).
635 desired: map[string]int{"special": 1},
636 current: slots{0, 1},
637 shouldPull: slots{9},
638 shouldPullMounts: []string{"zzzzz-mount-special20000009"}})
639 // Once storage classes are satisfied, trash excess replicas
640 // that appear earlier in probe order but aren't needed to
641 // satisfy the desired classes.
644 desired: map[string]int{"special": 1},
645 current: slots{0, 1, 9},
646 shouldTrash: slots{0, 1}})
647 // Pull to slot 5, the best server with class "special2".
650 desired: map[string]int{"special2": 1},
651 current: slots{0, 1},
652 shouldPull: slots{5},
653 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
654 // Pull to slot 5 and 9 to get replication 2 in desired class
658 desired: map[string]int{"special2": 2},
659 current: slots{0, 1},
660 shouldPull: slots{5, 9},
661 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
662 // Slot 0 has a replica in "default", slot 1 has a replica
663 // in "special"; we need another replica in "default", i.e.,
667 desired: map[string]int{"default": 2, "special": 1},
668 current: slots{0, 1},
669 shouldPull: slots{2}})
670 // Pull to best probe position 0 (despite wrong storage class)
671 // if it's impossible to achieve desired replication in the
672 // desired class (only slots 1 and 3 have special2).
675 desired: map[string]int{"special2": 3},
677 shouldPull: slots{0, 1}})
678 // Trash excess replica.
681 desired: map[string]int{"special": 1},
682 current: slots{0, 1},
683 shouldTrash: slots{1}})
684 // Leave one copy on slot 1 because slot 0 (server 9) only
688 desired: map[string]int{"special": 2},
689 current: slots{0, 1}})
692 // Clear all servers' changesets, balance a single block, and verify
693 // the appropriate changes for that block have been added to the
695 func (bal *balancerSuite) try(c *check.C, t tester) {
696 bal.setupLookupTables()
698 Replicas: bal.replList(t.known, t.current),
701 for i, t := range t.timestamps {
702 blk.Replicas[i].Mtime = t
704 for _, srv := range bal.srvs {
705 srv.ChangeSet = &ChangeSet{}
707 result := bal.balanceBlock(knownBlkid(t.known), blk)
709 var didPull, didTrash slots
710 var didPullMounts, didTrashMounts []string
711 for i, srv := range bal.srvs {
713 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
718 for _, pull := range srv.Pulls {
719 didPull = append(didPull, slot)
720 didPullMounts = append(didPullMounts, pull.To.UUID)
721 c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
723 for _, trash := range srv.Trashes {
724 didTrash = append(didTrash, slot)
725 didTrashMounts = append(didTrashMounts, trash.From.UUID)
726 c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
730 for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
731 sort.Sort(sort.IntSlice(list))
733 c.Check(didPull, check.DeepEquals, t.shouldPull)
734 c.Check(didTrash, check.DeepEquals, t.shouldTrash)
735 if t.shouldPullMounts != nil {
736 sort.Strings(didPullMounts)
737 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
739 if t.shouldTrashMounts != nil {
740 sort.Strings(didTrashMounts)
741 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
743 if t.expectBlockState != nil {
744 c.Check(result.blockState, check.Equals, *t.expectBlockState)
746 if t.expectClassState != nil {
747 c.Check(result.classState, check.DeepEquals, t.expectClassState)
751 // srvList returns the KeepServices, sorted in rendezvous order and
752 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
753 // returns the first-, second-, and fifth-best servers for storing
754 // bal.knownBlkid(3).
755 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
756 for _, i := range order {
757 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
762 // replList is like srvList but returns an "existing replicas" slice,
763 // suitable for a BlockState test fixture.
764 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
765 nextMnt := map[*KeepService]int{}
766 mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
767 for _, srv := range bal.srvList(knownBlockID, order) {
768 // round-robin repls onto each srv's mounts
770 nextMnt[srv] = (n + 1) % len(srv.mounts)
772 repls = append(repls, Replica{srv.mounts[n], mtime})
778 // generate the same data hashes that are tested in
779 // sdk/go/keepclient/root_sorter_test.go
780 func knownBlkid(i int) arvados.SizedDigest {
781 return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))