1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.arvados.org/arvados.git/sdk/go/arvados"
16 "git.arvados.org/arvados.git/sdk/go/ctxlog"
17 check "gopkg.in/check.v1"
21 func Test(t *testing.T) {
25 var _ = check.Suite(&balancerSuite{})
27 type balancerSuite struct {
30 blks map[string]tester
31 knownRendezvous [][]int
36 // index into knownRendezvous
44 desired map[string]int
50 shouldPullMounts []string
51 shouldTrashMounts []string
53 expectBlockState *balancedBlockState
54 expectClassState map[string]balancedBlockState
57 func (bal *balancerSuite) SetUpSuite(c *check.C) {
58 bal.knownRendezvous = nil
59 for _, str := range []string{
66 for _, c := range []byte(str) {
67 pos, _ := strconv.ParseUint(string(c), 16, 4)
68 slots = append(slots, int(pos))
70 bal.knownRendezvous = append(bal.knownRendezvous, slots)
73 bal.signatureTTL = 3600
74 bal.Logger = ctxlog.TestLogger(c)
77 func (bal *balancerSuite) SetUpTest(c *check.C) {
78 bal.srvs = make([]*KeepService, 16)
79 bal.KeepServices = make(map[string]*KeepService)
80 for i := range bal.srvs {
82 KeepService: arvados.KeepService{
83 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
86 srv.mounts = []*KeepMount{{
87 KeepMount: arvados.KeepMount{
88 UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
93 bal.KeepServices[srv.UUID] = srv
96 bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
100 func (bal *balancerSuite) TestPerfect(c *check.C) {
102 desired: map[string]int{"default": 2},
103 current: slots{0, 1},
106 expectBlockState: &balancedBlockState{
111 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
113 desired: map[string]int{"default": 2},
114 current: slots{0, 2, 1},
115 shouldTrash: slots{2},
116 expectBlockState: &balancedBlockState{
122 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
124 desired: map[string]int{"default": 0},
125 current: slots{0, 1, 3},
126 shouldTrash: slots{0, 1, 3},
127 expectBlockState: &balancedBlockState{
132 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
134 desired: map[string]int{"default": 4},
135 current: slots{0, 1},
136 shouldPull: slots{2, 3},
137 expectBlockState: &balancedBlockState{
143 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
144 bal.srvList(0, slots{3})[0].ReadOnly = true
146 desired: map[string]int{"default": 4},
147 current: slots{0, 1},
148 shouldPull: slots{2, 4},
149 expectBlockState: &balancedBlockState{
155 func (bal *balancerSuite) TestMultipleViewsReadOnly(c *check.C) {
156 bal.testMultipleViews(c, true)
159 func (bal *balancerSuite) TestMultipleViews(c *check.C) {
160 bal.testMultipleViews(c, false)
163 func (bal *balancerSuite) testMultipleViews(c *check.C, readonly bool) {
164 for i, srv := range bal.srvs {
165 // Add a mount to each service
166 srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i)
167 srv.mounts = append(srv.mounts, &KeepMount{
168 KeepMount: arvados.KeepMount{
169 DeviceID: fmt.Sprintf("writable-by-srv-%x", (i+1)%len(bal.srvs)),
170 UUID: fmt.Sprintf("zzzzz-mount-%015x", i<<16),
177 for i := 1; i < len(bal.srvs); i++ {
180 // Timestamps are all different, but one of
181 // the mounts on srv[4] has the same device ID
182 // where the non-deletable replica is stored
183 // on srv[3], so only one replica is safe to
186 desired: map[string]int{"default": 1},
187 current: slots{0, i, i},
188 shouldTrash: slots{i}})
190 // Timestamps are all different, and the third
191 // replica can't be trashed because it's on a
192 // read-only mount, so the first two replicas
193 // should be trashed.
195 desired: map[string]int{"default": 1},
196 current: slots{0, i, i},
197 shouldTrash: slots{0, i}})
199 // Timestamps are all different, so both
200 // replicas on the non-optimal server should
203 desired: map[string]int{"default": 1},
204 current: slots{0, i, i},
205 shouldTrash: slots{i, i}})
207 // If the three replicas have identical timestamps,
208 // none of them can be trashed safely.
210 desired: map[string]int{"default": 1},
211 current: slots{0, i, i},
212 timestamps: []int64{12345678, 12345678, 12345678}})
213 // If the first and third replicas have identical
214 // timestamps, only the second replica should be
217 desired: map[string]int{"default": 1},
218 current: slots{0, i, i},
219 timestamps: []int64{12345678, 12345679, 12345678},
220 shouldTrash: slots{i}})
224 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
226 desired: map[string]int{"default": 2},
227 current: slots{2, 0},
228 shouldPull: slots{1}})
230 desired: map[string]int{"default": 2},
231 current: slots{2, 7},
232 shouldPull: slots{0, 1}})
233 // if only one of the pulls succeeds, we'll see this next:
235 desired: map[string]int{"default": 2},
236 current: slots{2, 1, 7},
237 shouldPull: slots{0},
238 shouldTrash: slots{7}})
239 // if both pulls succeed, we'll see this next:
241 desired: map[string]int{"default": 2},
242 current: slots{2, 0, 1, 7},
243 shouldTrash: slots{2, 7}})
245 // unbalanced + excessive replication => pull + trash
247 desired: map[string]int{"default": 2},
248 current: slots{2, 5, 7},
249 shouldPull: slots{0, 1},
250 shouldTrash: slots{7}})
253 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
254 for s, srv := range bal.srvs {
255 for i := 0; i < 3; i++ {
256 m := *(srv.mounts[0])
257 m.UUID = fmt.Sprintf("zzzzz-mount-%015x", (s<<10)+i)
258 srv.mounts = append(srv.mounts, &m)
262 desired: map[string]int{"default": 2},
263 current: slots{0, 0},
264 shouldPull: slots{1}})
266 desired: map[string]int{"default": 2},
267 current: slots{2, 2},
268 shouldPull: slots{0, 1}})
270 desired: map[string]int{"default": 2},
271 current: slots{0, 0, 1},
272 shouldTrash: slots{0}})
274 desired: map[string]int{"default": 2},
275 current: slots{1, 1, 0},
276 shouldTrash: slots{1}})
278 desired: map[string]int{"default": 2},
279 current: slots{1, 0, 1, 0, 2},
280 shouldTrash: slots{0, 1, 2}})
282 desired: map[string]int{"default": 2},
283 current: slots{1, 1, 1, 0, 2},
284 shouldTrash: slots{1, 1, 2}})
286 desired: map[string]int{"default": 2},
287 current: slots{1, 1, 2},
288 shouldPull: slots{0},
289 shouldTrash: slots{1}})
291 desired: map[string]int{"default": 2},
292 current: slots{1, 1, 0},
293 timestamps: []int64{12345678, 12345678, 12345679},
296 desired: map[string]int{"default": 2},
297 current: slots{1, 1},
298 shouldPull: slots{0}})
301 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
302 // For purposes of increasing replication, we assume identical
303 // replicas are distinct.
305 desired: map[string]int{"default": 4},
306 current: slots{0, 1},
307 timestamps: []int64{12345678, 12345678},
308 shouldPull: slots{2, 3}})
311 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
312 // For purposes of decreasing replication, we assume identical
313 // replicas are NOT distinct.
315 desired: map[string]int{"default": 2},
316 current: slots{0, 1, 2},
317 timestamps: []int64{12345678, 12345678, 12345678}})
319 desired: map[string]int{"default": 2},
320 current: slots{0, 1, 2},
321 timestamps: []int64{12345678, 10000000, 10000000}})
324 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
325 oldTime := bal.MinMtime - 3600
326 newTime := bal.MinMtime + 3600
327 // The excess replica is too new to delete.
329 desired: map[string]int{"default": 2},
330 current: slots{0, 1, 2},
331 timestamps: []int64{oldTime, newTime, newTime + 1},
332 expectBlockState: &balancedBlockState{
336 // The best replicas are too new to delete, but the excess
337 // replica is old enough.
339 desired: map[string]int{"default": 2},
340 current: slots{0, 1, 2},
341 timestamps: []int64{newTime, newTime + 1, oldTime},
342 shouldTrash: slots{2}})
345 func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
346 bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
347 bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
348 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
349 c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
351 c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
354 desired: map[string]int{"default": 2},
356 shouldPull: slots{2}})
359 func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
360 bal.srvs[0].mounts[0].KeepMount.Replication = 2 // srv 0
361 bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e
363 // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2
366 desired: map[string]int{"default": 2},
368 shouldPull: slots{0},
369 expectBlockState: &balancedBlockState{
375 desired: map[string]int{"default": 2},
376 current: slots{0, 1},
378 expectBlockState: &balancedBlockState{
383 desired: map[string]int{"default": 2},
384 current: slots{0, 1, 2},
385 shouldTrash: slots{2},
386 expectBlockState: &balancedBlockState{
392 desired: map[string]int{"default": 3},
393 current: slots{0, 2, 3, 4},
394 shouldPull: slots{1},
395 shouldTrash: slots{4},
396 expectBlockState: &balancedBlockState{
403 desired: map[string]int{"default": 3},
404 current: slots{0, 1, 2, 3, 4},
405 shouldTrash: slots{2, 3, 4},
406 expectBlockState: &balancedBlockState{
412 desired: map[string]int{"default": 4},
413 current: slots{0, 1, 2, 3, 4},
414 shouldTrash: slots{3, 4},
415 expectBlockState: &balancedBlockState{
419 // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2
422 desired: map[string]int{"default": 2},
424 expectBlockState: &balancedBlockState{
429 desired: map[string]int{"default": 3},
431 shouldPull: slots{1},
432 expectBlockState: &balancedBlockState{
438 desired: map[string]int{"default": 4},
440 shouldPull: slots{1, 2},
441 expectBlockState: &balancedBlockState{
447 desired: map[string]int{"default": 4},
449 shouldPull: slots{0, 1},
450 expectBlockState: &balancedBlockState{
456 desired: map[string]int{"default": 4},
458 shouldPull: slots{0, 1, 2},
459 expectBlockState: &balancedBlockState{
465 desired: map[string]int{"default": 2},
466 current: slots{1, 2, 3, 4},
467 shouldPull: slots{0},
468 shouldTrash: slots{3, 4},
469 expectBlockState: &balancedBlockState{
476 desired: map[string]int{"default": 2},
477 current: slots{0, 1, 2},
478 shouldTrash: slots{1, 2},
479 expectBlockState: &balancedBlockState{
485 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
486 bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
487 bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
488 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
489 // block 0 belongs on servers 3 and e, which have different
493 desired: map[string]int{"default": 2},
495 shouldPull: slots{0}})
496 // block 1 belongs on servers 0 and 9, which both report
497 // having a replica, but the replicas are on the same device
498 // ID -- so we should pull to the third position (7).
501 desired: map[string]int{"default": 2},
502 current: slots{0, 1},
503 shouldPull: slots{2}})
504 // block 1 can be pulled to the doubly-mounted device, but the
505 // pull should only be done on the first of the two servers.
508 desired: map[string]int{"default": 2},
510 shouldPull: slots{0}})
511 // block 0 has one replica on a single device mounted on two
512 // servers (e,9 at positions 1,9). Trashing the replica on 9
513 // would lose the block.
516 desired: map[string]int{"default": 2},
517 current: slots{1, 9},
518 shouldPull: slots{0},
519 expectBlockState: &balancedBlockState{
523 // block 0 is overreplicated, but the second and third
524 // replicas are the same replica according to DeviceID
525 // (despite different Mtimes). Don't trash the third replica.
528 desired: map[string]int{"default": 2},
529 current: slots{0, 1, 9},
530 expectBlockState: &balancedBlockState{
533 // block 0 is overreplicated; the third and fifth replicas are
534 // extra, but the fourth is another view of the second and
535 // shouldn't be trashed.
538 desired: map[string]int{"default": 2},
539 current: slots{0, 1, 5, 9, 12},
540 shouldTrash: slots{5, 12},
541 expectBlockState: &balancedBlockState{
547 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
548 // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
549 // probe order. For these tests we give it two mounts, one
550 // with classes=[special], one with
551 // classes=[special,special2].
552 bal.srvs[9].mounts = []*KeepMount{{
553 KeepMount: arvados.KeepMount{
555 StorageClasses: map[string]bool{"special": true},
556 UUID: "zzzzz-mount-special00000009",
557 DeviceID: "9-special",
559 KeepService: bal.srvs[9],
561 KeepMount: arvados.KeepMount{
563 StorageClasses: map[string]bool{"special": true, "special2": true},
564 UUID: "zzzzz-mount-special20000009",
565 DeviceID: "9-special-and-special2",
567 KeepService: bal.srvs[9],
569 // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
570 // probe order. We give it two mounts, one with
571 // classes=[special3], one with classes=[default].
572 bal.srvs[13].mounts = []*KeepMount{{
573 KeepMount: arvados.KeepMount{
575 StorageClasses: map[string]bool{"special2": true},
576 UUID: "zzzzz-mount-special2000000d",
577 DeviceID: "13-special2",
579 KeepService: bal.srvs[13],
581 KeepMount: arvados.KeepMount{
583 StorageClasses: map[string]bool{"default": true},
584 UUID: "zzzzz-mount-00000000000000d",
585 DeviceID: "13-default",
587 KeepService: bal.srvs[13],
589 // Pull to slot 9 because that's the only server with the
590 // desired class "special".
593 desired: map[string]int{"default": 2, "special": 1},
594 current: slots{0, 1},
595 shouldPull: slots{9},
596 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
597 // If some storage classes are not satisfied, don't trash any
598 // excess replicas. (E.g., if someone desires repl=1 on
599 // class=durable, and we have two copies on class=volatile, we
600 // should wait for pull to succeed before trashing anything).
603 desired: map[string]int{"special": 1},
604 current: slots{0, 1},
605 shouldPull: slots{9},
606 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
607 // Once storage classes are satisfied, trash excess replicas
608 // that appear earlier in probe order but aren't needed to
609 // satisfy the desired classes.
612 desired: map[string]int{"special": 1},
613 current: slots{0, 1, 9},
614 shouldTrash: slots{0, 1}})
615 // Pull to slot 5, the best server with class "special2".
618 desired: map[string]int{"special2": 1},
619 current: slots{0, 1},
620 shouldPull: slots{5},
621 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
622 // Pull to slot 5 and 9 to get replication 2 in desired class
626 desired: map[string]int{"special2": 2},
627 current: slots{0, 1},
628 shouldPull: slots{5, 9},
629 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
630 // Slot 0 has a replica in "default", slot 1 has a replica
631 // in "special"; we need another replica in "default", i.e.,
635 desired: map[string]int{"default": 2, "special": 1},
636 current: slots{0, 1},
637 shouldPull: slots{2}})
638 // Pull to best probe position 0 (despite wrong storage class)
639 // if it's impossible to achieve desired replication in the
640 // desired class (only slots 1 and 3 have special2).
643 desired: map[string]int{"special2": 3},
645 shouldPull: slots{0, 1}})
646 // Trash excess replica.
649 desired: map[string]int{"special": 1},
650 current: slots{0, 1},
651 shouldTrash: slots{1}})
652 // Leave one copy on slot 1 because slot 0 (server 9) only
656 desired: map[string]int{"special": 2},
657 current: slots{0, 1}})
660 // Clear all servers' changesets, balance a single block, and verify
661 // the appropriate changes for that block have been added to the
663 func (bal *balancerSuite) try(c *check.C, t tester) {
664 bal.setupLookupTables()
666 Replicas: bal.replList(t.known, t.current),
669 for i, t := range t.timestamps {
670 blk.Replicas[i].Mtime = t
672 for _, srv := range bal.srvs {
673 srv.ChangeSet = &ChangeSet{}
675 result := bal.balanceBlock(knownBlkid(t.known), blk)
677 var didPull, didTrash slots
678 var didPullMounts, didTrashMounts []string
679 for i, srv := range bal.srvs {
681 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
686 for _, pull := range srv.Pulls {
687 didPull = append(didPull, slot)
688 didPullMounts = append(didPullMounts, pull.To.UUID)
689 c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
691 for _, trash := range srv.Trashes {
692 didTrash = append(didTrash, slot)
693 didTrashMounts = append(didTrashMounts, trash.From.UUID)
694 c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
698 for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
699 sort.Sort(sort.IntSlice(list))
701 c.Check(didPull, check.DeepEquals, t.shouldPull)
702 c.Check(didTrash, check.DeepEquals, t.shouldTrash)
703 if t.shouldPullMounts != nil {
704 sort.Strings(didPullMounts)
705 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
707 if t.shouldTrashMounts != nil {
708 sort.Strings(didTrashMounts)
709 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
711 if t.expectBlockState != nil {
712 c.Check(result.blockState, check.Equals, *t.expectBlockState)
714 if t.expectClassState != nil {
715 c.Check(result.classState, check.DeepEquals, t.expectClassState)
719 // srvList returns the KeepServices, sorted in rendezvous order and
720 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
721 // returns the first-, second-, and fifth-best servers for storing
722 // bal.knownBlkid(3).
723 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
724 for _, i := range order {
725 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
730 // replList is like srvList but returns an "existing replicas" slice,
731 // suitable for a BlockState test fixture.
732 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
733 nextMnt := map[*KeepService]int{}
734 mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
735 for _, srv := range bal.srvList(knownBlockID, order) {
736 // round-robin repls onto each srv's mounts
738 nextMnt[srv] = (n + 1) % len(srv.mounts)
740 repls = append(repls, Replica{srv.mounts[n], mtime})
746 // generate the same data hashes that are tested in
747 // sdk/go/keepclient/root_sorter_test.go
748 func knownBlkid(i int) arvados.SizedDigest {
749 return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))