1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.curoverse.com/arvados.git/sdk/go/arvados"
16 "git.curoverse.com/arvados.git/sdk/go/ctxlog"
17 check "gopkg.in/check.v1"
21 func Test(t *testing.T) {
25 var _ = check.Suite(&balancerSuite{})
27 type balancerSuite struct {
30 blks map[string]tester
31 knownRendezvous [][]int
36 // index into knownRendezvous
44 desired map[string]int
50 shouldPullMounts []string
51 shouldTrashMounts []string
53 expectResult balanceResult
56 func (bal *balancerSuite) SetUpSuite(c *check.C) {
57 bal.knownRendezvous = nil
58 for _, str := range []string{
65 for _, c := range []byte(str) {
66 pos, _ := strconv.ParseUint(string(c), 16, 4)
67 slots = append(slots, int(pos))
69 bal.knownRendezvous = append(bal.knownRendezvous, slots)
72 bal.signatureTTL = 3600
73 bal.Logger = ctxlog.TestLogger(c)
76 func (bal *balancerSuite) SetUpTest(c *check.C) {
77 bal.srvs = make([]*KeepService, 16)
78 bal.KeepServices = make(map[string]*KeepService)
79 for i := range bal.srvs {
81 KeepService: arvados.KeepService{
82 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
85 srv.mounts = []*KeepMount{{
86 KeepMount: arvados.KeepMount{
87 UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
92 bal.KeepServices[srv.UUID] = srv
95 bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
99 func (bal *balancerSuite) TestPerfect(c *check.C) {
101 desired: map[string]int{"default": 2},
102 current: slots{0, 1},
107 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
109 desired: map[string]int{"default": 2},
110 current: slots{0, 2, 1},
111 shouldTrash: slots{2}})
114 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
116 desired: map[string]int{"default": 0},
117 current: slots{0, 1, 3},
118 shouldTrash: slots{0, 1, 3}})
121 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
123 desired: map[string]int{"default": 4},
124 current: slots{0, 1},
125 shouldPull: slots{2, 3}})
128 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
129 bal.srvList(0, slots{3})[0].ReadOnly = true
131 desired: map[string]int{"default": 4},
132 current: slots{0, 1},
133 shouldPull: slots{2, 4}})
136 func (bal *balancerSuite) TestMultipleViewsReadOnly(c *check.C) {
137 bal.testMultipleViews(c, true)
140 func (bal *balancerSuite) TestMultipleViews(c *check.C) {
141 bal.testMultipleViews(c, false)
144 func (bal *balancerSuite) testMultipleViews(c *check.C, readonly bool) {
145 for i, srv := range bal.srvs {
146 // Add a mount to each service
147 srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i)
148 srv.mounts = append(srv.mounts, &KeepMount{
149 KeepMount: arvados.KeepMount{
150 DeviceID: fmt.Sprintf("writable-by-srv-%x", (i+1)%len(bal.srvs)),
151 UUID: fmt.Sprintf("zzzzz-mount-%015x", i<<16),
158 for i := 1; i < len(bal.srvs); i++ {
161 // Timestamps are all different, but one of
162 // the mounts on srv[4] has the same device ID
163 // where the non-deletable replica is stored
164 // on srv[3], so only one replica is safe to
167 desired: map[string]int{"default": 1},
168 current: slots{0, i, i},
169 shouldTrash: slots{i}})
171 // Timestamps are all different, and the third
172 // replica can't be trashed because it's on a
173 // read-only mount, so the first two replicas
174 // should be trashed.
176 desired: map[string]int{"default": 1},
177 current: slots{0, i, i},
178 shouldTrash: slots{0, i}})
180 // Timestamps are all different, so both
181 // replicas on the non-optimal server should
184 desired: map[string]int{"default": 1},
185 current: slots{0, i, i},
186 shouldTrash: slots{i, i}})
188 // If the three replicas have identical timestamps,
189 // none of them can be trashed safely.
191 desired: map[string]int{"default": 1},
192 current: slots{0, i, i},
193 timestamps: []int64{12345678, 12345678, 12345678}})
194 // If the first and third replicas have identical
195 // timestamps, only the second replica should be
198 desired: map[string]int{"default": 1},
199 current: slots{0, i, i},
200 timestamps: []int64{12345678, 12345679, 12345678},
201 shouldTrash: slots{i}})
205 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
207 desired: map[string]int{"default": 2},
208 current: slots{2, 0},
209 shouldPull: slots{1}})
211 desired: map[string]int{"default": 2},
212 current: slots{2, 7},
213 shouldPull: slots{0, 1}})
214 // if only one of the pulls succeeds, we'll see this next:
216 desired: map[string]int{"default": 2},
217 current: slots{2, 1, 7},
218 shouldPull: slots{0},
219 shouldTrash: slots{7}})
220 // if both pulls succeed, we'll see this next:
222 desired: map[string]int{"default": 2},
223 current: slots{2, 0, 1, 7},
224 shouldTrash: slots{2, 7}})
226 // unbalanced + excessive replication => pull + trash
228 desired: map[string]int{"default": 2},
229 current: slots{2, 5, 7},
230 shouldPull: slots{0, 1},
231 shouldTrash: slots{7}})
234 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
235 for s, srv := range bal.srvs {
236 for i := 0; i < 3; i++ {
237 m := *(srv.mounts[0])
238 m.UUID = fmt.Sprintf("zzzzz-mount-%015x", (s<<10)+i)
239 srv.mounts = append(srv.mounts, &m)
243 desired: map[string]int{"default": 2},
244 current: slots{0, 0},
245 shouldPull: slots{1}})
247 desired: map[string]int{"default": 2},
248 current: slots{2, 2},
249 shouldPull: slots{0, 1}})
251 desired: map[string]int{"default": 2},
252 current: slots{0, 0, 1},
253 shouldTrash: slots{0}})
255 desired: map[string]int{"default": 2},
256 current: slots{1, 1, 0},
257 shouldTrash: slots{1}})
259 desired: map[string]int{"default": 2},
260 current: slots{1, 0, 1, 0, 2},
261 shouldTrash: slots{0, 1, 2}})
263 desired: map[string]int{"default": 2},
264 current: slots{1, 1, 1, 0, 2},
265 shouldTrash: slots{1, 1, 2}})
267 desired: map[string]int{"default": 2},
268 current: slots{1, 1, 2},
269 shouldPull: slots{0},
270 shouldTrash: slots{1}})
272 desired: map[string]int{"default": 2},
273 current: slots{1, 1, 0},
274 timestamps: []int64{12345678, 12345678, 12345679},
277 desired: map[string]int{"default": 2},
278 current: slots{1, 1},
279 shouldPull: slots{0}})
282 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
283 // For purposes of increasing replication, we assume identical
284 // replicas are distinct.
286 desired: map[string]int{"default": 4},
287 current: slots{0, 1},
288 timestamps: []int64{12345678, 12345678},
289 shouldPull: slots{2, 3}})
292 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
293 // For purposes of decreasing replication, we assume identical
294 // replicas are NOT distinct.
296 desired: map[string]int{"default": 2},
297 current: slots{0, 1, 2},
298 timestamps: []int64{12345678, 12345678, 12345678}})
300 desired: map[string]int{"default": 2},
301 current: slots{0, 1, 2},
302 timestamps: []int64{12345678, 10000000, 10000000}})
305 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
306 oldTime := bal.MinMtime - 3600
307 newTime := bal.MinMtime + 3600
308 // The excess replica is too new to delete.
310 desired: map[string]int{"default": 2},
311 current: slots{0, 1, 2},
312 timestamps: []int64{oldTime, newTime, newTime + 1},
313 expectResult: balanceResult{
316 classState: map[string]balancedBlockState{"default": {
319 unachievable: false}}}})
320 // The best replicas are too new to delete, but the excess
321 // replica is old enough.
323 desired: map[string]int{"default": 2},
324 current: slots{0, 1, 2},
325 timestamps: []int64{newTime, newTime + 1, oldTime},
326 shouldTrash: slots{2}})
329 func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
330 bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
331 bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
332 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
333 c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
335 c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
338 desired: map[string]int{"default": 2},
340 shouldPull: slots{2}})
343 func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
344 bal.srvs[0].mounts[0].KeepMount.Replication = 2 // srv 0
345 bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e
347 // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2
350 desired: map[string]int{"default": 2},
352 shouldPull: slots{0}})
355 desired: map[string]int{"default": 2},
356 current: slots{0, 1},
360 desired: map[string]int{"default": 2},
361 current: slots{0, 1, 2},
362 shouldTrash: slots{2}})
365 desired: map[string]int{"default": 3},
366 current: slots{0, 2, 3, 4},
367 shouldPull: slots{1},
368 shouldTrash: slots{4},
369 expectResult: balanceResult{
372 classState: map[string]balancedBlockState{"default": {
375 unachievable: false}}}})
378 desired: map[string]int{"default": 3},
379 current: slots{0, 1, 2, 3, 4},
380 shouldTrash: slots{2, 3, 4}})
383 desired: map[string]int{"default": 4},
384 current: slots{0, 1, 2, 3, 4},
385 shouldTrash: slots{3, 4},
386 expectResult: balanceResult{
389 classState: map[string]balancedBlockState{"default": {
392 unachievable: false}}}})
393 // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2
396 desired: map[string]int{"default": 2},
398 expectResult: balanceResult{
401 classState: map[string]balancedBlockState{"default": {
404 unachievable: false}}}})
407 desired: map[string]int{"default": 3},
409 shouldPull: slots{1}})
412 desired: map[string]int{"default": 4},
414 shouldPull: slots{1, 2}})
417 desired: map[string]int{"default": 4},
419 shouldPull: slots{0, 1}})
422 desired: map[string]int{"default": 4},
424 shouldPull: slots{0, 1, 2},
425 expectResult: balanceResult{
428 classState: map[string]balancedBlockState{"default": {
431 unachievable: false}}}})
434 desired: map[string]int{"default": 2},
435 current: slots{1, 2, 3, 4},
436 shouldPull: slots{0},
437 shouldTrash: slots{3, 4}})
440 desired: map[string]int{"default": 2},
441 current: slots{0, 1, 2},
442 shouldTrash: slots{1, 2},
443 expectResult: balanceResult{
446 classState: map[string]balancedBlockState{"default": {
449 unachievable: false}}}})
452 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
453 bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
454 bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
455 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
456 // block 0 belongs on servers 3 and e, which have different
460 desired: map[string]int{"default": 2},
462 shouldPull: slots{0}})
463 // block 1 belongs on servers 0 and 9, which both report
464 // having a replica, but the replicas are on the same device
465 // ID -- so we should pull to the third position (7).
468 desired: map[string]int{"default": 2},
469 current: slots{0, 1},
470 shouldPull: slots{2}})
471 // block 1 can be pulled to the doubly-mounted device, but the
472 // pull should only be done on the first of the two servers.
475 desired: map[string]int{"default": 2},
477 shouldPull: slots{0}})
478 // block 0 has one replica on a single device mounted on two
479 // servers (e,9 at positions 1,9). Trashing the replica on 9
480 // would lose the block.
483 desired: map[string]int{"default": 2},
484 current: slots{1, 9},
485 shouldPull: slots{0},
486 expectResult: balanceResult{
488 classState: map[string]balancedBlockState{"default": {
491 unachievable: false}}}})
492 // block 0 is overreplicated, but the second and third
493 // replicas are the same replica according to DeviceID
494 // (despite different Mtimes). Don't trash the third replica.
497 desired: map[string]int{"default": 2},
498 current: slots{0, 1, 9},
499 expectResult: balanceResult{
501 classState: map[string]balancedBlockState{"default": {
504 unachievable: false}}}})
505 // block 0 is overreplicated; the third and fifth replicas are
506 // extra, but the fourth is another view of the second and
507 // shouldn't be trashed.
510 desired: map[string]int{"default": 2},
511 current: slots{0, 1, 5, 9, 12},
512 shouldTrash: slots{5, 12},
513 expectResult: balanceResult{
515 classState: map[string]balancedBlockState{"default": {
518 unachievable: false}}}})
521 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
522 // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
523 // probe order. For these tests we give it two mounts, one
524 // with classes=[special], one with
525 // classes=[special,special2].
526 bal.srvs[9].mounts = []*KeepMount{{
527 KeepMount: arvados.KeepMount{
529 StorageClasses: map[string]bool{"special": true},
530 UUID: "zzzzz-mount-special00000009",
531 DeviceID: "9-special",
533 KeepService: bal.srvs[9],
535 KeepMount: arvados.KeepMount{
537 StorageClasses: map[string]bool{"special": true, "special2": true},
538 UUID: "zzzzz-mount-special20000009",
539 DeviceID: "9-special-and-special2",
541 KeepService: bal.srvs[9],
543 // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
544 // probe order. We give it two mounts, one with
545 // classes=[special3], one with classes=[default].
546 bal.srvs[13].mounts = []*KeepMount{{
547 KeepMount: arvados.KeepMount{
549 StorageClasses: map[string]bool{"special2": true},
550 UUID: "zzzzz-mount-special2000000d",
551 DeviceID: "13-special2",
553 KeepService: bal.srvs[13],
555 KeepMount: arvados.KeepMount{
557 StorageClasses: map[string]bool{"default": true},
558 UUID: "zzzzz-mount-00000000000000d",
559 DeviceID: "13-default",
561 KeepService: bal.srvs[13],
563 // Pull to slot 9 because that's the only server with the
564 // desired class "special".
567 desired: map[string]int{"default": 2, "special": 1},
568 current: slots{0, 1},
569 shouldPull: slots{9},
570 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
571 // If some storage classes are not satisfied, don't trash any
572 // excess replicas. (E.g., if someone desires repl=1 on
573 // class=durable, and we have two copies on class=volatile, we
574 // should wait for pull to succeed before trashing anything).
577 desired: map[string]int{"special": 1},
578 current: slots{0, 1},
579 shouldPull: slots{9},
580 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
581 // Once storage classes are satisfied, trash excess replicas
582 // that appear earlier in probe order but aren't needed to
583 // satisfy the desired classes.
586 desired: map[string]int{"special": 1},
587 current: slots{0, 1, 9},
588 shouldTrash: slots{0, 1}})
589 // Pull to slot 5, the best server with class "special2".
592 desired: map[string]int{"special2": 1},
593 current: slots{0, 1},
594 shouldPull: slots{5},
595 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
596 // Pull to slot 5 and 9 to get replication 2 in desired class
600 desired: map[string]int{"special2": 2},
601 current: slots{0, 1},
602 shouldPull: slots{5, 9},
603 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
604 // Slot 0 has a replica in "default", slot 1 has a replica
605 // in "special"; we need another replica in "default", i.e.,
609 desired: map[string]int{"default": 2, "special": 1},
610 current: slots{0, 1},
611 shouldPull: slots{2}})
612 // Pull to best probe position 0 (despite wrong storage class)
613 // if it's impossible to achieve desired replication in the
614 // desired class (only slots 1 and 3 have special2).
617 desired: map[string]int{"special2": 3},
619 shouldPull: slots{0, 1}})
620 // Trash excess replica.
623 desired: map[string]int{"special": 1},
624 current: slots{0, 1},
625 shouldTrash: slots{1}})
626 // Leave one copy on slot 1 because slot 0 (server 9) only
630 desired: map[string]int{"special": 2},
631 current: slots{0, 1}})
634 // Clear all servers' changesets, balance a single block, and verify
635 // the appropriate changes for that block have been added to the
637 func (bal *balancerSuite) try(c *check.C, t tester) {
638 bal.setupLookupTables()
640 Replicas: bal.replList(t.known, t.current),
643 for i, t := range t.timestamps {
644 blk.Replicas[i].Mtime = t
646 for _, srv := range bal.srvs {
647 srv.ChangeSet = &ChangeSet{}
649 result := bal.balanceBlock(knownBlkid(t.known), blk)
651 var didPull, didTrash slots
652 var didPullMounts, didTrashMounts []string
653 for i, srv := range bal.srvs {
655 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
660 for _, pull := range srv.Pulls {
661 didPull = append(didPull, slot)
662 didPullMounts = append(didPullMounts, pull.To.UUID)
663 c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
665 for _, trash := range srv.Trashes {
666 didTrash = append(didTrash, slot)
667 didTrashMounts = append(didTrashMounts, trash.From.UUID)
668 c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
672 for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
673 sort.Sort(sort.IntSlice(list))
675 c.Check(didPull, check.DeepEquals, t.shouldPull)
676 c.Check(didTrash, check.DeepEquals, t.shouldTrash)
677 if t.shouldPullMounts != nil {
678 sort.Strings(didPullMounts)
679 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
681 if t.shouldTrashMounts != nil {
682 sort.Strings(didTrashMounts)
683 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
685 if t.expectResult.have > 0 {
686 c.Check(result.have, check.Equals, t.expectResult.have)
688 if t.expectResult.want > 0 {
689 c.Check(result.want, check.Equals, t.expectResult.want)
691 if t.expectResult.classState != nil {
692 c.Check(result.classState, check.DeepEquals, t.expectResult.classState)
696 // srvList returns the KeepServices, sorted in rendezvous order and
697 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
698 // returns the first-, second-, and fifth-best servers for storing
699 // bal.knownBlkid(3).
700 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
701 for _, i := range order {
702 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
707 // replList is like srvList but returns an "existing replicas" slice,
708 // suitable for a BlockState test fixture.
709 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
710 nextMnt := map[*KeepService]int{}
711 mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
712 for _, srv := range bal.srvList(knownBlockID, order) {
713 // round-robin repls onto each srv's mounts
715 nextMnt[srv] = (n + 1) % len(srv.mounts)
717 repls = append(repls, Replica{srv.mounts[n], mtime})
723 // generate the same data hashes that are tested in
724 // sdk/go/keepclient/root_sorter_test.go
725 func knownBlkid(i int) arvados.SizedDigest {
726 return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))