1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.curoverse.com/arvados.git/sdk/go/arvados"
17 check "gopkg.in/check.v1"
21 func Test(t *testing.T) {
25 var _ = check.Suite(&balancerSuite{})
27 type balancerSuite struct {
30 blks map[string]tester
31 knownRendezvous [][]int
36 // index into knownRendezvous
44 desired map[string]int
50 shouldPullMounts []string
51 shouldTrashMounts []string
53 expectResult balanceResult
56 func (bal *balancerSuite) SetUpSuite(c *check.C) {
57 bal.knownRendezvous = nil
58 for _, str := range []string{
65 for _, c := range []byte(str) {
66 pos, _ := strconv.ParseUint(string(c), 16, 4)
67 slots = append(slots, int(pos))
69 bal.knownRendezvous = append(bal.knownRendezvous, slots)
72 bal.signatureTTL = 3600
75 func (bal *balancerSuite) SetUpTest(c *check.C) {
76 bal.srvs = make([]*KeepService, 16)
77 bal.KeepServices = make(map[string]*KeepService)
78 for i := range bal.srvs {
80 KeepService: arvados.KeepService{
81 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
84 srv.mounts = []*KeepMount{{
85 KeepMount: arvados.KeepMount{
86 UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
91 bal.KeepServices[srv.UUID] = srv
94 bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
98 func (bal *balancerSuite) TestPerfect(c *check.C) {
100 desired: map[string]int{"default": 2},
101 current: slots{0, 1},
106 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
108 desired: map[string]int{"default": 2},
109 current: slots{0, 2, 1},
110 shouldTrash: slots{2}})
113 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
115 desired: map[string]int{"default": 0},
116 current: slots{0, 1, 3},
117 shouldTrash: slots{0, 1, 3}})
120 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
122 desired: map[string]int{"default": 4},
123 current: slots{0, 1},
124 shouldPull: slots{2, 3}})
127 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
128 bal.srvList(0, slots{3})[0].ReadOnly = true
130 desired: map[string]int{"default": 4},
131 current: slots{0, 1},
132 shouldPull: slots{2, 4}})
135 func (bal *balancerSuite) TestMultipleViewsReadOnly(c *check.C) {
136 bal.testMultipleViews(c, true)
139 func (bal *balancerSuite) TestMultipleViews(c *check.C) {
140 bal.testMultipleViews(c, false)
143 func (bal *balancerSuite) testMultipleViews(c *check.C, readonly bool) {
144 for i, srv := range bal.srvs {
145 // Add a mount to each service
146 srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i)
147 srv.mounts = append(srv.mounts, &KeepMount{
148 KeepMount: arvados.KeepMount{
149 DeviceID: fmt.Sprintf("writable-by-srv-%x", (i+1)%len(bal.srvs)),
150 UUID: fmt.Sprintf("zzzzz-mount-%015x", i<<16),
157 for i := 1; i < len(bal.srvs); i++ {
160 // Timestamps are all different, but one of
161 // the mounts on srv[4] has the same device ID
162 // where the non-deletable replica is stored
163 // on srv[3], so only one replica is safe to
166 desired: map[string]int{"default": 1},
167 current: slots{0, i, i},
168 shouldTrash: slots{i}})
170 // Timestamps are all different, and the third
171 // replica can't be trashed because it's on a
172 // read-only mount, so the first two replicas
173 // should be trashed.
175 desired: map[string]int{"default": 1},
176 current: slots{0, i, i},
177 shouldTrash: slots{0, i}})
179 // Timestamps are all different, so both
180 // replicas on the non-optimal server should
183 desired: map[string]int{"default": 1},
184 current: slots{0, i, i},
185 shouldTrash: slots{i, i}})
187 // If the three replicas have identical timestamps,
188 // none of them can be trashed safely.
190 desired: map[string]int{"default": 1},
191 current: slots{0, i, i},
192 timestamps: []int64{12345678, 12345678, 12345678}})
193 // If the first and third replicas have identical
194 // timestamps, only the second replica should be
197 desired: map[string]int{"default": 1},
198 current: slots{0, i, i},
199 timestamps: []int64{12345678, 12345679, 12345678},
200 shouldTrash: slots{i}})
204 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
206 desired: map[string]int{"default": 2},
207 current: slots{2, 0},
208 shouldPull: slots{1}})
210 desired: map[string]int{"default": 2},
211 current: slots{2, 7},
212 shouldPull: slots{0, 1}})
213 // if only one of the pulls succeeds, we'll see this next:
215 desired: map[string]int{"default": 2},
216 current: slots{2, 1, 7},
217 shouldPull: slots{0},
218 shouldTrash: slots{7}})
219 // if both pulls succeed, we'll see this next:
221 desired: map[string]int{"default": 2},
222 current: slots{2, 0, 1, 7},
223 shouldTrash: slots{2, 7}})
225 // unbalanced + excessive replication => pull + trash
227 desired: map[string]int{"default": 2},
228 current: slots{2, 5, 7},
229 shouldPull: slots{0, 1},
230 shouldTrash: slots{7}})
233 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
234 for s, srv := range bal.srvs {
235 for i := 0; i < 3; i++ {
236 m := *(srv.mounts[0])
237 m.UUID = fmt.Sprintf("zzzzz-mount-%015x", (s<<10)+i)
238 srv.mounts = append(srv.mounts, &m)
242 desired: map[string]int{"default": 2},
243 current: slots{0, 0},
244 shouldPull: slots{1}})
246 desired: map[string]int{"default": 2},
247 current: slots{2, 2},
248 shouldPull: slots{0, 1}})
250 desired: map[string]int{"default": 2},
251 current: slots{0, 0, 1},
252 shouldTrash: slots{0}})
254 desired: map[string]int{"default": 2},
255 current: slots{1, 1, 0},
256 shouldTrash: slots{1}})
258 desired: map[string]int{"default": 2},
259 current: slots{1, 0, 1, 0, 2},
260 shouldTrash: slots{0, 1, 2}})
262 desired: map[string]int{"default": 2},
263 current: slots{1, 1, 1, 0, 2},
264 shouldTrash: slots{1, 1, 2}})
266 desired: map[string]int{"default": 2},
267 current: slots{1, 1, 2},
268 shouldPull: slots{0},
269 shouldTrash: slots{1}})
271 desired: map[string]int{"default": 2},
272 current: slots{1, 1, 0},
273 timestamps: []int64{12345678, 12345678, 12345679},
276 desired: map[string]int{"default": 2},
277 current: slots{1, 1},
278 shouldPull: slots{0}})
281 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
282 // For purposes of increasing replication, we assume identical
283 // replicas are distinct.
285 desired: map[string]int{"default": 4},
286 current: slots{0, 1},
287 timestamps: []int64{12345678, 12345678},
288 shouldPull: slots{2, 3}})
291 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
292 // For purposes of decreasing replication, we assume identical
293 // replicas are NOT distinct.
295 desired: map[string]int{"default": 2},
296 current: slots{0, 1, 2},
297 timestamps: []int64{12345678, 12345678, 12345678}})
299 desired: map[string]int{"default": 2},
300 current: slots{0, 1, 2},
301 timestamps: []int64{12345678, 10000000, 10000000}})
304 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
305 oldTime := bal.MinMtime - 3600
306 newTime := bal.MinMtime + 3600
307 // The excess replica is too new to delete.
309 desired: map[string]int{"default": 2},
310 current: slots{0, 1, 2},
311 timestamps: []int64{oldTime, newTime, newTime + 1},
312 expectResult: balanceResult{
315 classState: map[string]balancedBlockState{"default": {
318 unachievable: false}}}})
319 // The best replicas are too new to delete, but the excess
320 // replica is old enough.
322 desired: map[string]int{"default": 2},
323 current: slots{0, 1, 2},
324 timestamps: []int64{newTime, newTime + 1, oldTime},
325 shouldTrash: slots{2}})
328 func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
329 bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
330 bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
331 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
332 c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
334 c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
337 desired: map[string]int{"default": 2},
339 shouldPull: slots{2}})
342 func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
343 bal.srvs[0].mounts[0].KeepMount.Replication = 2 // srv 0
344 bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e
346 // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2
349 desired: map[string]int{"default": 2},
351 shouldPull: slots{0}})
354 desired: map[string]int{"default": 2},
355 current: slots{0, 1},
359 desired: map[string]int{"default": 2},
360 current: slots{0, 1, 2},
361 shouldTrash: slots{2}})
364 desired: map[string]int{"default": 3},
365 current: slots{0, 2, 3, 4},
366 shouldPull: slots{1},
367 shouldTrash: slots{4},
368 expectResult: balanceResult{
371 classState: map[string]balancedBlockState{"default": {
374 unachievable: false}}}})
377 desired: map[string]int{"default": 3},
378 current: slots{0, 1, 2, 3, 4},
379 shouldTrash: slots{2, 3, 4}})
382 desired: map[string]int{"default": 4},
383 current: slots{0, 1, 2, 3, 4},
384 shouldTrash: slots{3, 4},
385 expectResult: balanceResult{
388 classState: map[string]balancedBlockState{"default": {
391 unachievable: false}}}})
392 // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2
395 desired: map[string]int{"default": 2},
397 expectResult: balanceResult{
400 classState: map[string]balancedBlockState{"default": {
403 unachievable: false}}}})
406 desired: map[string]int{"default": 3},
408 shouldPull: slots{1}})
411 desired: map[string]int{"default": 4},
413 shouldPull: slots{1, 2}})
416 desired: map[string]int{"default": 4},
418 shouldPull: slots{0, 1}})
421 desired: map[string]int{"default": 4},
423 shouldPull: slots{0, 1, 2},
424 expectResult: balanceResult{
427 classState: map[string]balancedBlockState{"default": {
430 unachievable: false}}}})
433 desired: map[string]int{"default": 2},
434 current: slots{1, 2, 3, 4},
435 shouldPull: slots{0},
436 shouldTrash: slots{3, 4}})
439 desired: map[string]int{"default": 2},
440 current: slots{0, 1, 2},
441 shouldTrash: slots{1, 2},
442 expectResult: balanceResult{
445 classState: map[string]balancedBlockState{"default": {
448 unachievable: false}}}})
451 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
452 bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
453 bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
454 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
455 // block 0 belongs on servers 3 and e, which have different
459 desired: map[string]int{"default": 2},
461 shouldPull: slots{0}})
462 // block 1 belongs on servers 0 and 9, which both report
463 // having a replica, but the replicas are on the same device
464 // ID -- so we should pull to the third position (7).
467 desired: map[string]int{"default": 2},
468 current: slots{0, 1},
469 shouldPull: slots{2}})
470 // block 1 can be pulled to the doubly-mounted device, but the
471 // pull should only be done on the first of the two servers.
474 desired: map[string]int{"default": 2},
476 shouldPull: slots{0}})
477 // block 0 has one replica on a single device mounted on two
478 // servers (e,9 at positions 1,9). Trashing the replica on 9
479 // would lose the block.
482 desired: map[string]int{"default": 2},
483 current: slots{1, 9},
484 shouldPull: slots{0},
485 expectResult: balanceResult{
487 classState: map[string]balancedBlockState{"default": {
490 unachievable: false}}}})
491 // block 0 is overreplicated, but the second and third
492 // replicas are the same replica according to DeviceID
493 // (despite different Mtimes). Don't trash the third replica.
496 desired: map[string]int{"default": 2},
497 current: slots{0, 1, 9},
498 expectResult: balanceResult{
500 classState: map[string]balancedBlockState{"default": {
503 unachievable: false}}}})
504 // block 0 is overreplicated; the third and fifth replicas are
505 // extra, but the fourth is another view of the second and
506 // shouldn't be trashed.
509 desired: map[string]int{"default": 2},
510 current: slots{0, 1, 5, 9, 12},
511 shouldTrash: slots{5, 12},
512 expectResult: balanceResult{
514 classState: map[string]balancedBlockState{"default": {
517 unachievable: false}}}})
520 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
521 // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
522 // probe order. For these tests we give it two mounts, one
523 // with classes=[special], one with
524 // classes=[special,special2].
525 bal.srvs[9].mounts = []*KeepMount{{
526 KeepMount: arvados.KeepMount{
528 StorageClasses: []string{"special"},
529 UUID: "zzzzz-mount-special00000009",
530 DeviceID: "9-special",
532 KeepService: bal.srvs[9],
534 KeepMount: arvados.KeepMount{
536 StorageClasses: []string{"special", "special2"},
537 UUID: "zzzzz-mount-special20000009",
538 DeviceID: "9-special-and-special2",
540 KeepService: bal.srvs[9],
542 // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
543 // probe order. We give it two mounts, one with
544 // classes=[special3], one with classes=[default].
545 bal.srvs[13].mounts = []*KeepMount{{
546 KeepMount: arvados.KeepMount{
548 StorageClasses: []string{"special2"},
549 UUID: "zzzzz-mount-special2000000d",
550 DeviceID: "13-special2",
552 KeepService: bal.srvs[13],
554 KeepMount: arvados.KeepMount{
556 StorageClasses: []string{"default"},
557 UUID: "zzzzz-mount-00000000000000d",
558 DeviceID: "13-default",
560 KeepService: bal.srvs[13],
562 // Pull to slot 9 because that's the only server with the
563 // desired class "special".
566 desired: map[string]int{"default": 2, "special": 1},
567 current: slots{0, 1},
568 shouldPull: slots{9},
569 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
570 // If some storage classes are not satisfied, don't trash any
571 // excess replicas. (E.g., if someone desires repl=1 on
572 // class=durable, and we have two copies on class=volatile, we
573 // should wait for pull to succeed before trashing anything).
576 desired: map[string]int{"special": 1},
577 current: slots{0, 1},
578 shouldPull: slots{9},
579 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
580 // Once storage classes are satisfied, trash excess replicas
581 // that appear earlier in probe order but aren't needed to
582 // satisfy the desired classes.
585 desired: map[string]int{"special": 1},
586 current: slots{0, 1, 9},
587 shouldTrash: slots{0, 1}})
588 // Pull to slot 5, the best server with class "special2".
591 desired: map[string]int{"special2": 1},
592 current: slots{0, 1},
593 shouldPull: slots{5},
594 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
595 // Pull to slot 5 and 9 to get replication 2 in desired class
599 desired: map[string]int{"special2": 2},
600 current: slots{0, 1},
601 shouldPull: slots{5, 9},
602 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
603 // Slot 0 has a replica in "default", slot 1 has a replica
604 // in "special"; we need another replica in "default", i.e.,
608 desired: map[string]int{"default": 2, "special": 1},
609 current: slots{0, 1},
610 shouldPull: slots{2}})
611 // Pull to best probe position 0 (despite wrong storage class)
612 // if it's impossible to achieve desired replication in the
613 // desired class (only slots 1 and 3 have special2).
616 desired: map[string]int{"special2": 3},
618 shouldPull: slots{0, 1}})
619 // Trash excess replica.
622 desired: map[string]int{"special": 1},
623 current: slots{0, 1},
624 shouldTrash: slots{1}})
625 // Leave one copy on slot 1 because slot 0 (server 9) only
629 desired: map[string]int{"special": 2},
630 current: slots{0, 1}})
633 // Clear all servers' changesets, balance a single block, and verify
634 // the appropriate changes for that block have been added to the
636 func (bal *balancerSuite) try(c *check.C, t tester) {
637 bal.setupLookupTables()
639 Replicas: bal.replList(t.known, t.current),
642 for i, t := range t.timestamps {
643 blk.Replicas[i].Mtime = t
645 for _, srv := range bal.srvs {
646 srv.ChangeSet = &ChangeSet{}
648 result := bal.balanceBlock(knownBlkid(t.known), blk)
650 var didPull, didTrash slots
651 var didPullMounts, didTrashMounts []string
652 for i, srv := range bal.srvs {
654 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
659 for _, pull := range srv.Pulls {
660 didPull = append(didPull, slot)
661 didPullMounts = append(didPullMounts, pull.To.UUID)
662 c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
664 for _, trash := range srv.Trashes {
665 didTrash = append(didTrash, slot)
666 didTrashMounts = append(didTrashMounts, trash.From.UUID)
667 c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
671 for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
672 sort.Sort(sort.IntSlice(list))
674 c.Check(didPull, check.DeepEquals, t.shouldPull)
675 c.Check(didTrash, check.DeepEquals, t.shouldTrash)
676 if t.shouldPullMounts != nil {
677 sort.Strings(didPullMounts)
678 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
680 if t.shouldTrashMounts != nil {
681 sort.Strings(didTrashMounts)
682 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
684 if t.expectResult.have > 0 {
685 c.Check(result.have, check.Equals, t.expectResult.have)
687 if t.expectResult.want > 0 {
688 c.Check(result.want, check.Equals, t.expectResult.want)
690 if t.expectResult.classState != nil {
691 c.Check(result.classState, check.DeepEquals, t.expectResult.classState)
695 // srvList returns the KeepServices, sorted in rendezvous order and
696 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
697 // returns the the first-, second-, and fifth-best servers for storing
698 // bal.knownBlkid(3).
699 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
700 for _, i := range order {
701 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
706 // replList is like srvList but returns an "existing replicas" slice,
707 // suitable for a BlockState test fixture.
708 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
709 nextMnt := map[*KeepService]int{}
710 mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
711 for _, srv := range bal.srvList(knownBlockID, order) {
712 // round-robin repls onto each srv's mounts
714 nextMnt[srv] = (n + 1) % len(srv.mounts)
716 repls = append(repls, Replica{srv.mounts[n], mtime})
722 // generate the same data hashes that are tested in
723 // sdk/go/keepclient/root_sorter_test.go
724 func knownBlkid(i int) arvados.SizedDigest {
725 return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))