1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.arvados.org/arvados.git/sdk/go/arvados"
16 "git.arvados.org/arvados.git/sdk/go/ctxlog"
17 check "gopkg.in/check.v1"
21 func Test(t *testing.T) {
25 var _ = check.Suite(&balancerSuite{})
27 type balancerSuite struct {
30 blks map[string]tester
31 knownRendezvous [][]int
36 // index into knownRendezvous
44 desired map[string]int
50 shouldPullMounts []string
51 shouldTrashMounts []string
53 expectBlockState *balancedBlockState
54 expectClassState map[string]balancedBlockState
57 func (bal *balancerSuite) SetUpSuite(c *check.C) {
58 bal.knownRendezvous = nil
59 for _, str := range []string{
66 for _, c := range []byte(str) {
67 pos, _ := strconv.ParseUint(string(c), 16, 4)
68 slots = append(slots, int(pos))
70 bal.knownRendezvous = append(bal.knownRendezvous, slots)
73 bal.signatureTTL = 3600
74 bal.Logger = ctxlog.TestLogger(c)
77 func (bal *balancerSuite) SetUpTest(c *check.C) {
78 bal.srvs = make([]*KeepService, 16)
79 bal.KeepServices = make(map[string]*KeepService)
80 for i := range bal.srvs {
82 KeepService: arvados.KeepService{
83 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
86 srv.mounts = []*KeepMount{{
87 KeepMount: arvados.KeepMount{
88 UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
89 StorageClasses: map[string]bool{"default": true},
96 bal.KeepServices[srv.UUID] = srv
99 bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
103 func (bal *balancerSuite) TestPerfect(c *check.C) {
105 desired: map[string]int{"default": 2},
106 current: slots{0, 1},
109 expectBlockState: &balancedBlockState{
114 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
116 desired: map[string]int{"default": 2},
117 current: slots{0, 2, 1},
118 shouldTrash: slots{2},
119 expectBlockState: &balancedBlockState{
125 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
127 desired: map[string]int{"default": 0},
128 current: slots{0, 1, 3},
129 shouldTrash: slots{0, 1, 3},
130 expectBlockState: &balancedBlockState{
135 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
137 desired: map[string]int{"default": 4},
138 current: slots{0, 1},
139 shouldPull: slots{2, 3},
140 expectBlockState: &balancedBlockState{
146 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
147 bal.srvList(0, slots{3})[0].ReadOnly = true
149 desired: map[string]int{"default": 4},
150 current: slots{0, 1},
151 shouldPull: slots{2, 4},
152 expectBlockState: &balancedBlockState{
158 func (bal *balancerSuite) TestAllowTrashWhenReadOnly(c *check.C) {
159 srvs := bal.srvList(0, slots{3})
160 srvs[0].mounts[0].KeepMount.AllowWrite = false
161 srvs[0].mounts[0].KeepMount.AllowTrash = true
162 // can't pull to slot 3, so pull to slot 4 instead
164 desired: map[string]int{"default": 4},
165 current: slots{0, 1},
166 shouldPull: slots{2, 4},
167 expectBlockState: &balancedBlockState{
171 // expect to be able to trash slot 3 in future, so pull to
174 desired: map[string]int{"default": 2},
175 current: slots{0, 3},
176 shouldPull: slots{1},
177 expectBlockState: &balancedBlockState{
181 // trash excess from slot 3
183 desired: map[string]int{"default": 2},
184 current: slots{0, 1, 3},
185 shouldTrash: slots{3},
186 expectBlockState: &balancedBlockState{
192 func (bal *balancerSuite) TestMultipleViewsReadOnly(c *check.C) {
193 bal.testMultipleViews(c, false, false)
196 func (bal *balancerSuite) TestMultipleViewsReadOnlyAllowTrash(c *check.C) {
197 bal.testMultipleViews(c, false, true)
200 func (bal *balancerSuite) TestMultipleViews(c *check.C) {
201 bal.testMultipleViews(c, true, true)
204 func (bal *balancerSuite) testMultipleViews(c *check.C, allowWrite, allowTrash bool) {
205 for i, srv := range bal.srvs {
206 // Add a mount to each service
207 srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i)
208 srv.mounts = append(srv.mounts, &KeepMount{
209 KeepMount: arvados.KeepMount{
210 DeviceID: bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.DeviceID,
211 UUID: bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.UUID,
212 AllowWrite: allowWrite,
213 AllowTrash: allowTrash,
215 StorageClasses: map[string]bool{"default": true},
220 for i := 1; i < len(bal.srvs); i++ {
223 // Timestamps are all different, but one of
224 // the mounts on srv[4] has the same device ID
225 // where the non-deletable replica is stored
226 // on srv[3], so only one replica is safe to
229 desired: map[string]int{"default": 1},
230 current: slots{0, i, i},
231 shouldTrash: slots{i}})
232 } else if !allowTrash {
233 // Timestamps are all different, and the third
234 // replica can't be trashed because it's on a
235 // read-only mount (with
236 // AllowTrashWhenReadOnly=false), so the first
237 // two replicas should be trashed.
239 desired: map[string]int{"default": 1},
240 current: slots{0, i, i},
241 shouldTrash: slots{0, i}})
243 // Timestamps are all different, so both
244 // replicas on the non-optimal server should
247 desired: map[string]int{"default": 1},
248 current: slots{0, i, i},
249 shouldTrash: slots{i, i}})
251 // If the three replicas have identical timestamps,
252 // none of them can be trashed safely.
254 desired: map[string]int{"default": 1},
255 current: slots{0, i, i},
256 timestamps: []int64{12345678, 12345678, 12345678}})
257 // If the first and third replicas have identical
258 // timestamps, only the second replica should be
261 desired: map[string]int{"default": 1},
262 current: slots{0, i, i},
263 timestamps: []int64{12345678, 12345679, 12345678},
264 shouldTrash: slots{i}})
268 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
270 desired: map[string]int{"default": 2},
271 current: slots{2, 0},
272 shouldPull: slots{1}})
274 desired: map[string]int{"default": 2},
275 current: slots{2, 7},
276 shouldPull: slots{0, 1}})
277 // if only one of the pulls succeeds, we'll see this next:
279 desired: map[string]int{"default": 2},
280 current: slots{2, 1, 7},
281 shouldPull: slots{0},
282 shouldTrash: slots{7}})
283 // if both pulls succeed, we'll see this next:
285 desired: map[string]int{"default": 2},
286 current: slots{2, 0, 1, 7},
287 shouldTrash: slots{2, 7}})
289 // unbalanced + excessive replication => pull + trash
291 desired: map[string]int{"default": 2},
292 current: slots{2, 5, 7},
293 shouldPull: slots{0, 1},
294 shouldTrash: slots{7}})
297 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
298 for s, srv := range bal.srvs {
299 for i := 0; i < 3; i++ {
300 m := *(srv.mounts[0])
301 m.UUID = fmt.Sprintf("zzzzz-mount-%015x", (s<<10)+i)
302 srv.mounts = append(srv.mounts, &m)
306 desired: map[string]int{"default": 2},
307 current: slots{0, 0},
308 shouldPull: slots{1}})
310 desired: map[string]int{"default": 2},
311 current: slots{2, 2},
312 shouldPull: slots{0, 1}})
314 desired: map[string]int{"default": 2},
315 current: slots{0, 0, 1},
316 shouldTrash: slots{0}})
318 desired: map[string]int{"default": 2},
319 current: slots{1, 1, 0},
320 shouldTrash: slots{1}})
322 desired: map[string]int{"default": 2},
323 current: slots{1, 0, 1, 0, 2},
324 shouldTrash: slots{0, 1, 2}})
326 desired: map[string]int{"default": 2},
327 current: slots{1, 1, 1, 0, 2},
328 shouldTrash: slots{1, 1, 2}})
330 desired: map[string]int{"default": 2},
331 current: slots{1, 1, 2},
332 shouldPull: slots{0},
333 shouldTrash: slots{1}})
335 desired: map[string]int{"default": 2},
336 current: slots{1, 1, 0},
337 timestamps: []int64{12345678, 12345678, 12345679},
340 desired: map[string]int{"default": 2},
341 current: slots{1, 1},
342 shouldPull: slots{0}})
345 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
346 // For purposes of increasing replication, we assume identical
347 // replicas are distinct.
349 desired: map[string]int{"default": 4},
350 current: slots{0, 1},
351 timestamps: []int64{12345678, 12345678},
352 shouldPull: slots{2, 3}})
355 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
356 // For purposes of decreasing replication, we assume identical
357 // replicas are NOT distinct.
359 desired: map[string]int{"default": 2},
360 current: slots{0, 1, 2},
361 timestamps: []int64{12345678, 12345678, 12345678}})
363 desired: map[string]int{"default": 2},
364 current: slots{0, 1, 2},
365 timestamps: []int64{12345678, 10000000, 10000000}})
367 desired: map[string]int{"default": 0},
368 current: slots{0, 1, 2},
369 timestamps: []int64{12345678, 12345678, 12345678},
370 shouldTrash: slots{0},
371 shouldTrashMounts: []string{
372 bal.srvs[bal.knownRendezvous[0][0]].mounts[0].UUID}})
374 desired: map[string]int{"default": 2},
375 current: slots{0, 1, 2, 5, 6},
376 timestamps: []int64{12345678, 12345679, 10000000, 10000000, 10000000},
377 shouldTrash: slots{2},
378 shouldTrashMounts: []string{
379 bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID}})
381 desired: map[string]int{"default": 2},
382 current: slots{0, 1, 2, 5, 6},
383 timestamps: []int64{12345678, 12345679, 12345671, 10000000, 10000000},
384 shouldTrash: slots{2, 5},
385 shouldTrashMounts: []string{
386 bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID,
387 bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
389 desired: map[string]int{"default": 2},
390 current: slots{0, 1, 2, 5, 6},
391 timestamps: []int64{12345678, 12345679, 12345679, 10000000, 10000000},
392 shouldTrash: slots{5},
393 shouldTrashMounts: []string{
394 bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
397 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
398 oldTime := bal.MinMtime - 3600
399 newTime := bal.MinMtime + 3600
400 // The excess replica is too new to delete.
402 desired: map[string]int{"default": 2},
403 current: slots{0, 1, 2},
404 timestamps: []int64{oldTime, newTime, newTime + 1},
405 expectBlockState: &balancedBlockState{
409 // The best replicas are too new to delete, but the excess
410 // replica is old enough.
412 desired: map[string]int{"default": 2},
413 current: slots{0, 1, 2},
414 timestamps: []int64{newTime, newTime + 1, oldTime},
415 shouldTrash: slots{2}})
418 func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
419 bal.srvs[3].mounts[0].KeepMount.AllowWrite = false
420 bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
421 bal.srvs[14].mounts[0].KeepMount.UUID = bal.srvs[3].mounts[0].KeepMount.UUID
422 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
423 c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
425 c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
428 desired: map[string]int{"default": 2},
430 shouldPull: slots{2}})
433 func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
434 bal.srvs[0].mounts[0].KeepMount.Replication = 2 // srv 0
435 bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e
437 // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2
440 desired: map[string]int{"default": 2},
442 shouldPull: slots{0},
443 expectBlockState: &balancedBlockState{
449 desired: map[string]int{"default": 2},
450 current: slots{0, 1},
452 expectBlockState: &balancedBlockState{
457 desired: map[string]int{"default": 2},
458 current: slots{0, 1, 2},
459 shouldTrash: slots{2},
460 expectBlockState: &balancedBlockState{
466 desired: map[string]int{"default": 3},
467 current: slots{0, 2, 3, 4},
468 shouldPull: slots{1},
469 shouldTrash: slots{4},
470 expectBlockState: &balancedBlockState{
477 desired: map[string]int{"default": 3},
478 current: slots{0, 1, 2, 3, 4},
479 shouldTrash: slots{2, 3, 4},
480 expectBlockState: &balancedBlockState{
486 desired: map[string]int{"default": 4},
487 current: slots{0, 1, 2, 3, 4},
488 shouldTrash: slots{3, 4},
489 expectBlockState: &balancedBlockState{
493 // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2
496 desired: map[string]int{"default": 2},
498 expectBlockState: &balancedBlockState{
503 desired: map[string]int{"default": 3},
505 shouldPull: slots{1},
506 expectBlockState: &balancedBlockState{
512 desired: map[string]int{"default": 4},
514 shouldPull: slots{1, 2},
515 expectBlockState: &balancedBlockState{
521 desired: map[string]int{"default": 4},
523 shouldPull: slots{0, 1},
524 expectBlockState: &balancedBlockState{
530 desired: map[string]int{"default": 4},
532 shouldPull: slots{0, 1, 2},
533 expectBlockState: &balancedBlockState{
539 desired: map[string]int{"default": 2},
540 current: slots{1, 2, 3, 4},
541 shouldPull: slots{0},
542 shouldTrash: slots{3, 4},
543 expectBlockState: &balancedBlockState{
550 desired: map[string]int{"default": 2},
551 current: slots{0, 1, 2},
552 shouldTrash: slots{1, 2},
553 expectBlockState: &balancedBlockState{
559 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
560 dupUUID := bal.srvs[0].mounts[0].KeepMount.UUID
561 bal.srvs[9].mounts[0].KeepMount.UUID = dupUUID
562 bal.srvs[14].mounts[0].KeepMount.UUID = dupUUID
563 // block 0 belongs on servers 3 and e, which have different
567 desired: map[string]int{"default": 2},
569 shouldPull: slots{0}})
570 // block 1 belongs on servers 0 and 9, which both report
571 // having a replica, but the replicas are on the same volume
572 // -- so we should pull to the third position (7).
575 desired: map[string]int{"default": 2},
576 current: slots{0, 1},
577 shouldPull: slots{2}})
578 // block 1 can be pulled to the doubly-mounted volume, but the
579 // pull should only be done on the first of the two servers.
582 desired: map[string]int{"default": 2},
584 shouldPull: slots{0}})
585 // block 0 has one replica on a single volume mounted on two
586 // servers (e,9 at positions 1,9). Trashing the replica on 9
587 // would lose the block.
590 desired: map[string]int{"default": 2},
591 current: slots{1, 9},
592 shouldPull: slots{0},
593 expectBlockState: &balancedBlockState{
597 // block 0 is overreplicated, but the second and third
598 // replicas are the same replica according to volume UUID
599 // (despite different Mtimes). Don't trash the third replica.
602 desired: map[string]int{"default": 2},
603 current: slots{0, 1, 9},
604 expectBlockState: &balancedBlockState{
607 // block 0 is overreplicated; the third and fifth replicas are
608 // extra, but the fourth is another view of the second and
609 // shouldn't be trashed.
612 desired: map[string]int{"default": 2},
613 current: slots{0, 1, 5, 9, 12},
614 shouldTrash: slots{5, 12},
615 expectBlockState: &balancedBlockState{
621 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
622 // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
623 // probe order. For these tests we give it two mounts, one
624 // with classes=[special], one with
625 // classes=[special,special2].
626 bal.srvs[9].mounts = []*KeepMount{{
627 KeepMount: arvados.KeepMount{
631 StorageClasses: map[string]bool{"special": true},
632 UUID: "zzzzz-mount-special00000009",
633 DeviceID: "9-special",
635 KeepService: bal.srvs[9],
637 KeepMount: arvados.KeepMount{
641 StorageClasses: map[string]bool{"special": true, "special2": true},
642 UUID: "zzzzz-mount-special20000009",
643 DeviceID: "9-special-and-special2",
645 KeepService: bal.srvs[9],
647 // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
648 // probe order. We give it two mounts, one with
649 // classes=[special3], one with classes=[default].
650 bal.srvs[13].mounts = []*KeepMount{{
651 KeepMount: arvados.KeepMount{
655 StorageClasses: map[string]bool{"special2": true},
656 UUID: "zzzzz-mount-special2000000d",
657 DeviceID: "13-special2",
659 KeepService: bal.srvs[13],
661 KeepMount: arvados.KeepMount{
665 StorageClasses: map[string]bool{"default": true},
666 UUID: "zzzzz-mount-00000000000000d",
667 DeviceID: "13-default",
669 KeepService: bal.srvs[13],
671 // Pull to slot 9 because that's the only server with the
672 // desired class "special".
675 desired: map[string]int{"default": 2, "special": 1},
676 current: slots{0, 1},
677 shouldPull: slots{9},
678 shouldPullMounts: []string{"zzzzz-mount-special20000009"}})
679 // If some storage classes are not satisfied, don't trash any
680 // excess replicas. (E.g., if someone desires repl=1 on
681 // class=durable, and we have two copies on class=volatile, we
682 // should wait for pull to succeed before trashing anything).
685 desired: map[string]int{"special": 1},
686 current: slots{0, 1},
687 shouldPull: slots{9},
688 shouldPullMounts: []string{"zzzzz-mount-special20000009"}})
689 // Once storage classes are satisfied, trash excess replicas
690 // that appear earlier in probe order but aren't needed to
691 // satisfy the desired classes.
694 desired: map[string]int{"special": 1},
695 current: slots{0, 1, 9},
696 shouldTrash: slots{0, 1}})
697 // Pull to slot 5, the best server with class "special2".
700 desired: map[string]int{"special2": 1},
701 current: slots{0, 1},
702 shouldPull: slots{5},
703 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
704 // Pull to slot 5 and 9 to get replication 2 in desired class
708 desired: map[string]int{"special2": 2},
709 current: slots{0, 1},
710 shouldPull: slots{5, 9},
711 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
712 // Slot 0 has a replica in "default", slot 1 has a replica
713 // in "special"; we need another replica in "default", i.e.,
717 desired: map[string]int{"default": 2, "special": 1},
718 current: slots{0, 1},
719 shouldPull: slots{2}})
720 // Pull to best probe position 0 (despite wrong storage class)
721 // if it's impossible to achieve desired replication in the
722 // desired class (only slots 1 and 3 have special2).
725 desired: map[string]int{"special2": 3},
727 shouldPull: slots{0, 1}})
728 // Trash excess replica.
731 desired: map[string]int{"special": 1},
732 current: slots{0, 1},
733 shouldTrash: slots{1}})
734 // Leave one copy on slot 1 because slot 0 (server 9) only
738 desired: map[string]int{"special": 2},
739 current: slots{0, 1}})
742 // Clear all servers' changesets, balance a single block, and verify
743 // the appropriate changes for that block have been added to the
745 func (bal *balancerSuite) try(c *check.C, t tester) {
746 bal.setupLookupTables()
748 Replicas: bal.replList(t.known, t.current),
751 for i, t := range t.timestamps {
752 blk.Replicas[i].Mtime = t
754 for _, srv := range bal.srvs {
755 srv.ChangeSet = &ChangeSet{}
757 result := bal.balanceBlock(knownBlkid(t.known), blk)
759 var didPull, didTrash slots
760 var didPullMounts, didTrashMounts []string
761 for i, srv := range bal.srvs {
763 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
768 for _, pull := range srv.Pulls {
769 didPull = append(didPull, slot)
770 didPullMounts = append(didPullMounts, pull.To.UUID)
771 c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
773 for _, trash := range srv.Trashes {
774 didTrash = append(didTrash, slot)
775 didTrashMounts = append(didTrashMounts, trash.From.UUID)
776 c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
780 for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
781 sort.Sort(sort.IntSlice(list))
783 c.Check(didPull, check.DeepEquals, t.shouldPull)
784 c.Check(didTrash, check.DeepEquals, t.shouldTrash)
785 if t.shouldPullMounts != nil {
786 sort.Strings(didPullMounts)
787 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
789 if t.shouldTrashMounts != nil {
790 sort.Strings(didTrashMounts)
791 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
793 if t.expectBlockState != nil {
794 c.Check(result.blockState, check.Equals, *t.expectBlockState)
796 if t.expectClassState != nil {
797 c.Check(result.classState, check.DeepEquals, t.expectClassState)
801 // srvList returns the KeepServices, sorted in rendezvous order and
802 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
803 // returns the first-, second-, and fifth-best servers for storing
804 // bal.knownBlkid(3).
805 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
806 for _, i := range order {
807 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
812 // replList is like srvList but returns an "existing replicas" slice,
813 // suitable for a BlockState test fixture.
814 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
815 nextMnt := map[*KeepService]int{}
816 mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
817 for _, srv := range bal.srvList(knownBlockID, order) {
818 // round-robin repls onto each srv's mounts
820 nextMnt[srv] = (n + 1) % len(srv.mounts)
822 repls = append(repls, Replica{srv.mounts[n], mtime})
828 // generate the same data hashes that are tested in
829 // sdk/go/keepclient/root_sorter_test.go
830 func knownBlkid(i int) arvados.SizedDigest {
831 return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))