1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.curoverse.com/arvados.git/sdk/go/arvados"
16 check "gopkg.in/check.v1"
20 func Test(t *testing.T) {
24 var _ = check.Suite(&balancerSuite{})
26 type balancerSuite struct {
29 blks map[string]tester
30 knownRendezvous [][]int
35 // index into knownRendezvous
43 desired map[string]int
49 shouldPullMounts []string
50 shouldTrashMounts []string
52 expectResult balanceResult
55 func (bal *balancerSuite) SetUpSuite(c *check.C) {
56 bal.knownRendezvous = nil
57 for _, str := range []string{
64 for _, c := range []byte(str) {
65 pos, _ := strconv.ParseUint(string(c), 16, 4)
66 slots = append(slots, int(pos))
68 bal.knownRendezvous = append(bal.knownRendezvous, slots)
71 bal.signatureTTL = 3600
74 func (bal *balancerSuite) SetUpTest(c *check.C) {
75 bal.srvs = make([]*KeepService, 16)
76 bal.KeepServices = make(map[string]*KeepService)
77 for i := range bal.srvs {
79 KeepService: arvados.KeepService{
80 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
83 srv.mounts = []*KeepMount{{
84 KeepMount: arvados.KeepMount{
85 UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
90 bal.KeepServices[srv.UUID] = srv
93 bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
97 func (bal *balancerSuite) TestPerfect(c *check.C) {
99 desired: map[string]int{"default": 2},
100 current: slots{0, 1},
105 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
107 desired: map[string]int{"default": 2},
108 current: slots{0, 2, 1},
109 shouldTrash: slots{2}})
112 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
114 desired: map[string]int{"default": 0},
115 current: slots{0, 1, 3},
116 shouldTrash: slots{0, 1, 3}})
119 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
121 desired: map[string]int{"default": 4},
122 current: slots{0, 1},
123 shouldPull: slots{2, 3}})
126 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
127 bal.srvList(0, slots{3})[0].ReadOnly = true
129 desired: map[string]int{"default": 4},
130 current: slots{0, 1},
131 shouldPull: slots{2, 4}})
134 func (bal *balancerSuite) TestMultipleViewsReadOnly(c *check.C) {
135 bal.testMultipleViews(c, true)
138 func (bal *balancerSuite) TestMultipleViews(c *check.C) {
139 bal.testMultipleViews(c, false)
142 func (bal *balancerSuite) testMultipleViews(c *check.C, readonly bool) {
143 for i, srv := range bal.srvs {
144 // Add a mount to each service
145 srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i)
146 srv.mounts = append(srv.mounts, &KeepMount{
147 KeepMount: arvados.KeepMount{
148 DeviceID: fmt.Sprintf("writable-by-srv-%x", (i+1)%len(bal.srvs)),
149 UUID: fmt.Sprintf("zzzzz-mount-%015x", i<<16),
156 for i := 1; i < len(bal.srvs); i++ {
159 // Timestamps are all different, but one of
160 // the mounts on srv[4] has the same device ID
161 // where the non-deletable replica is stored
162 // on srv[3], so only one replica is safe to
165 desired: map[string]int{"default": 1},
166 current: slots{0, i, i},
167 shouldTrash: slots{i}})
169 // Timestamps are all different, and the third
170 // replica can't be trashed because it's on a
171 // read-only mount, so the first two replicas
172 // should be trashed.
174 desired: map[string]int{"default": 1},
175 current: slots{0, i, i},
176 shouldTrash: slots{0, i}})
178 // Timestamps are all different, so both
179 // replicas on the non-optimal server should
182 desired: map[string]int{"default": 1},
183 current: slots{0, i, i},
184 shouldTrash: slots{i, i}})
186 // If the three replicas have identical timestamps,
187 // none of them can be trashed safely.
189 desired: map[string]int{"default": 1},
190 current: slots{0, i, i},
191 timestamps: []int64{12345678, 12345678, 12345678}})
192 // If the first and third replicas have identical
193 // timestamps, only the second replica should be
196 desired: map[string]int{"default": 1},
197 current: slots{0, i, i},
198 timestamps: []int64{12345678, 12345679, 12345678},
199 shouldTrash: slots{i}})
203 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
205 desired: map[string]int{"default": 2},
206 current: slots{2, 0},
207 shouldPull: slots{1}})
209 desired: map[string]int{"default": 2},
210 current: slots{2, 7},
211 shouldPull: slots{0, 1}})
212 // if only one of the pulls succeeds, we'll see this next:
214 desired: map[string]int{"default": 2},
215 current: slots{2, 1, 7},
216 shouldPull: slots{0},
217 shouldTrash: slots{7}})
218 // if both pulls succeed, we'll see this next:
220 desired: map[string]int{"default": 2},
221 current: slots{2, 0, 1, 7},
222 shouldTrash: slots{2, 7}})
224 // unbalanced + excessive replication => pull + trash
226 desired: map[string]int{"default": 2},
227 current: slots{2, 5, 7},
228 shouldPull: slots{0, 1},
229 shouldTrash: slots{7}})
232 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
233 for s, srv := range bal.srvs {
234 for i := 0; i < 3; i++ {
235 m := *(srv.mounts[0])
236 m.UUID = fmt.Sprintf("zzzzz-mount-%015x", (s<<10)+i)
237 srv.mounts = append(srv.mounts, &m)
241 desired: map[string]int{"default": 2},
242 current: slots{0, 0},
243 shouldPull: slots{1}})
245 desired: map[string]int{"default": 2},
246 current: slots{2, 2},
247 shouldPull: slots{0, 1}})
249 desired: map[string]int{"default": 2},
250 current: slots{0, 0, 1},
251 shouldTrash: slots{0}})
253 desired: map[string]int{"default": 2},
254 current: slots{1, 1, 0},
255 shouldTrash: slots{1}})
257 desired: map[string]int{"default": 2},
258 current: slots{1, 0, 1, 0, 2},
259 shouldTrash: slots{0, 1, 2}})
261 desired: map[string]int{"default": 2},
262 current: slots{1, 1, 1, 0, 2},
263 shouldTrash: slots{1, 1, 2}})
265 desired: map[string]int{"default": 2},
266 current: slots{1, 1, 2},
267 shouldPull: slots{0},
268 shouldTrash: slots{1}})
270 desired: map[string]int{"default": 2},
271 current: slots{1, 1, 0},
272 timestamps: []int64{12345678, 12345678, 12345679},
275 desired: map[string]int{"default": 2},
276 current: slots{1, 1},
277 shouldPull: slots{0}})
280 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
281 // For purposes of increasing replication, we assume identical
282 // replicas are distinct.
284 desired: map[string]int{"default": 4},
285 current: slots{0, 1},
286 timestamps: []int64{12345678, 12345678},
287 shouldPull: slots{2, 3}})
290 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
291 // For purposes of decreasing replication, we assume identical
292 // replicas are NOT distinct.
294 desired: map[string]int{"default": 2},
295 current: slots{0, 1, 2},
296 timestamps: []int64{12345678, 12345678, 12345678}})
298 desired: map[string]int{"default": 2},
299 current: slots{0, 1, 2},
300 timestamps: []int64{12345678, 10000000, 10000000}})
303 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
304 oldTime := bal.MinMtime - 3600
305 newTime := bal.MinMtime + 3600
306 // The excess replica is too new to delete.
308 desired: map[string]int{"default": 2},
309 current: slots{0, 1, 2},
310 timestamps: []int64{oldTime, newTime, newTime + 1},
311 expectResult: balanceResult{
314 classState: map[string]balancedBlockState{"default": {
317 unachievable: false}}}})
318 // The best replicas are too new to delete, but the excess
319 // replica is old enough.
321 desired: map[string]int{"default": 2},
322 current: slots{0, 1, 2},
323 timestamps: []int64{newTime, newTime + 1, oldTime},
324 shouldTrash: slots{2}})
327 func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
328 bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
329 bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
330 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
331 c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
333 c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
336 desired: map[string]int{"default": 2},
338 shouldPull: slots{2}})
341 func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
342 bal.srvs[0].mounts[0].KeepMount.Replication = 2 // srv 0
343 bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e
345 // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2
348 desired: map[string]int{"default": 2},
350 shouldPull: slots{0}})
353 desired: map[string]int{"default": 2},
354 current: slots{0, 1},
358 desired: map[string]int{"default": 2},
359 current: slots{0, 1, 2},
360 shouldTrash: slots{2}})
363 desired: map[string]int{"default": 3},
364 current: slots{0, 2, 3, 4},
365 shouldPull: slots{1},
366 shouldTrash: slots{4},
367 expectResult: balanceResult{
370 classState: map[string]balancedBlockState{"default": {
373 unachievable: false}}}})
376 desired: map[string]int{"default": 3},
377 current: slots{0, 1, 2, 3, 4},
378 shouldTrash: slots{2, 3, 4}})
381 desired: map[string]int{"default": 4},
382 current: slots{0, 1, 2, 3, 4},
383 shouldTrash: slots{3, 4},
384 expectResult: balanceResult{
387 classState: map[string]balancedBlockState{"default": {
390 unachievable: false}}}})
391 // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2
394 desired: map[string]int{"default": 2},
396 expectResult: balanceResult{
399 classState: map[string]balancedBlockState{"default": {
402 unachievable: false}}}})
405 desired: map[string]int{"default": 3},
407 shouldPull: slots{1}})
410 desired: map[string]int{"default": 4},
412 shouldPull: slots{1, 2}})
415 desired: map[string]int{"default": 4},
417 shouldPull: slots{0, 1}})
420 desired: map[string]int{"default": 4},
422 shouldPull: slots{0, 1, 2},
423 expectResult: balanceResult{
426 classState: map[string]balancedBlockState{"default": {
429 unachievable: false}}}})
432 desired: map[string]int{"default": 2},
433 current: slots{1, 2, 3, 4},
434 shouldPull: slots{0},
435 shouldTrash: slots{3, 4}})
438 desired: map[string]int{"default": 2},
439 current: slots{0, 1, 2},
440 shouldTrash: slots{1, 2},
441 expectResult: balanceResult{
444 classState: map[string]balancedBlockState{"default": {
447 unachievable: false}}}})
450 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
451 bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
452 bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
453 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
454 // block 0 belongs on servers 3 and e, which have different
458 desired: map[string]int{"default": 2},
460 shouldPull: slots{0}})
461 // block 1 belongs on servers 0 and 9, which both report
462 // having a replica, but the replicas are on the same device
463 // ID -- so we should pull to the third position (7).
466 desired: map[string]int{"default": 2},
467 current: slots{0, 1},
468 shouldPull: slots{2}})
469 // block 1 can be pulled to the doubly-mounted device, but the
470 // pull should only be done on the first of the two servers.
473 desired: map[string]int{"default": 2},
475 shouldPull: slots{0}})
476 // block 0 has one replica on a single device mounted on two
477 // servers (e,9 at positions 1,9). Trashing the replica on 9
478 // would lose the block.
481 desired: map[string]int{"default": 2},
482 current: slots{1, 9},
483 shouldPull: slots{0},
484 expectResult: balanceResult{
486 classState: map[string]balancedBlockState{"default": {
489 unachievable: false}}}})
490 // block 0 is overreplicated, but the second and third
491 // replicas are the same replica according to DeviceID
492 // (despite different Mtimes). Don't trash the third replica.
495 desired: map[string]int{"default": 2},
496 current: slots{0, 1, 9},
497 expectResult: balanceResult{
499 classState: map[string]balancedBlockState{"default": {
502 unachievable: false}}}})
503 // block 0 is overreplicated; the third and fifth replicas are
504 // extra, but the fourth is another view of the second and
505 // shouldn't be trashed.
508 desired: map[string]int{"default": 2},
509 current: slots{0, 1, 5, 9, 12},
510 shouldTrash: slots{5, 12},
511 expectResult: balanceResult{
513 classState: map[string]balancedBlockState{"default": {
516 unachievable: false}}}})
519 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
520 // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
521 // probe order. For these tests we give it two mounts, one
522 // with classes=[special], one with
523 // classes=[special,special2].
524 bal.srvs[9].mounts = []*KeepMount{{
525 KeepMount: arvados.KeepMount{
527 StorageClasses: []string{"special"},
528 UUID: "zzzzz-mount-special00000009",
529 DeviceID: "9-special",
531 KeepService: bal.srvs[9],
533 KeepMount: arvados.KeepMount{
535 StorageClasses: []string{"special", "special2"},
536 UUID: "zzzzz-mount-special20000009",
537 DeviceID: "9-special-and-special2",
539 KeepService: bal.srvs[9],
541 // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
542 // probe order. We give it two mounts, one with
543 // classes=[special3], one with classes=[default].
544 bal.srvs[13].mounts = []*KeepMount{{
545 KeepMount: arvados.KeepMount{
547 StorageClasses: []string{"special2"},
548 UUID: "zzzzz-mount-special2000000d",
549 DeviceID: "13-special2",
551 KeepService: bal.srvs[13],
553 KeepMount: arvados.KeepMount{
555 StorageClasses: []string{"default"},
556 UUID: "zzzzz-mount-00000000000000d",
557 DeviceID: "13-default",
559 KeepService: bal.srvs[13],
561 // Pull to slot 9 because that's the only server with the
562 // desired class "special".
565 desired: map[string]int{"default": 2, "special": 1},
566 current: slots{0, 1},
567 shouldPull: slots{9},
568 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
569 // If some storage classes are not satisfied, don't trash any
570 // excess replicas. (E.g., if someone desires repl=1 on
571 // class=durable, and we have two copies on class=volatile, we
572 // should wait for pull to succeed before trashing anything).
575 desired: map[string]int{"special": 1},
576 current: slots{0, 1},
577 shouldPull: slots{9},
578 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
579 // Once storage classes are satisfied, trash excess replicas
580 // that appear earlier in probe order but aren't needed to
581 // satisfy the desired classes.
584 desired: map[string]int{"special": 1},
585 current: slots{0, 1, 9},
586 shouldTrash: slots{0, 1}})
587 // Pull to slot 5, the best server with class "special2".
590 desired: map[string]int{"special2": 1},
591 current: slots{0, 1},
592 shouldPull: slots{5},
593 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
594 // Pull to slot 5 and 9 to get replication 2 in desired class
598 desired: map[string]int{"special2": 2},
599 current: slots{0, 1},
600 shouldPull: slots{5, 9},
601 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
602 // Slot 0 has a replica in "default", slot 1 has a replica
603 // in "special"; we need another replica in "default", i.e.,
607 desired: map[string]int{"default": 2, "special": 1},
608 current: slots{0, 1},
609 shouldPull: slots{2}})
610 // Pull to best probe position 0 (despite wrong storage class)
611 // if it's impossible to achieve desired replication in the
612 // desired class (only slots 1 and 3 have special2).
615 desired: map[string]int{"special2": 3},
617 shouldPull: slots{0, 1}})
618 // Trash excess replica.
621 desired: map[string]int{"special": 1},
622 current: slots{0, 1},
623 shouldTrash: slots{1}})
624 // Leave one copy on slot 1 because slot 0 (server 9) only
628 desired: map[string]int{"special": 2},
629 current: slots{0, 1}})
632 // Clear all servers' changesets, balance a single block, and verify
633 // the appropriate changes for that block have been added to the
635 func (bal *balancerSuite) try(c *check.C, t tester) {
636 bal.setupLookupTables()
638 Replicas: bal.replList(t.known, t.current),
641 for i, t := range t.timestamps {
642 blk.Replicas[i].Mtime = t
644 for _, srv := range bal.srvs {
645 srv.ChangeSet = &ChangeSet{}
647 result := bal.balanceBlock(knownBlkid(t.known), blk)
649 var didPull, didTrash slots
650 var didPullMounts, didTrashMounts []string
651 for i, srv := range bal.srvs {
653 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
658 for _, pull := range srv.Pulls {
659 didPull = append(didPull, slot)
660 didPullMounts = append(didPullMounts, pull.To.UUID)
661 c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
663 for _, trash := range srv.Trashes {
664 didTrash = append(didTrash, slot)
665 didTrashMounts = append(didTrashMounts, trash.From.UUID)
666 c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
670 for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
671 sort.Sort(sort.IntSlice(list))
673 c.Check(didPull, check.DeepEquals, t.shouldPull)
674 c.Check(didTrash, check.DeepEquals, t.shouldTrash)
675 if t.shouldPullMounts != nil {
676 sort.Strings(didPullMounts)
677 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
679 if t.shouldTrashMounts != nil {
680 sort.Strings(didTrashMounts)
681 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
683 if t.expectResult.have > 0 {
684 c.Check(result.have, check.Equals, t.expectResult.have)
686 if t.expectResult.want > 0 {
687 c.Check(result.want, check.Equals, t.expectResult.want)
689 if t.expectResult.classState != nil {
690 c.Check(result.classState, check.DeepEquals, t.expectResult.classState)
694 // srvList returns the KeepServices, sorted in rendezvous order and
695 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
696 // returns the first-, second-, and fifth-best servers for storing
697 // bal.knownBlkid(3).
698 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
699 for _, i := range order {
700 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
705 // replList is like srvList but returns an "existing replicas" slice,
706 // suitable for a BlockState test fixture.
707 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
708 nextMnt := map[*KeepService]int{}
709 mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
710 for _, srv := range bal.srvList(knownBlockID, order) {
711 // round-robin repls onto each srv's mounts
713 nextMnt[srv] = (n + 1) % len(srv.mounts)
715 repls = append(repls, Replica{srv.mounts[n], mtime})
721 // generate the same data hashes that are tested in
722 // sdk/go/keepclient/root_sorter_test.go
723 func knownBlkid(i int) arvados.SizedDigest {
724 return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))