1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.curoverse.com/arvados.git/sdk/go/arvados"
17 check "gopkg.in/check.v1"
21 func Test(t *testing.T) {
25 var _ = check.Suite(&balancerSuite{})
27 type balancerSuite struct {
30 blks map[string]tester
31 knownRendezvous [][]int
36 // index into knownRendezvous
44 desired map[string]int
50 shouldPullMounts []string
51 shouldTrashMounts []string
53 expectResult balanceResult
56 func (bal *balancerSuite) SetUpSuite(c *check.C) {
57 bal.knownRendezvous = nil
58 for _, str := range []string{
65 for _, c := range []byte(str) {
66 pos, _ := strconv.ParseUint(string(c), 16, 4)
67 slots = append(slots, int(pos))
69 bal.knownRendezvous = append(bal.knownRendezvous, slots)
72 bal.signatureTTL = 3600
75 func (bal *balancerSuite) SetUpTest(c *check.C) {
76 bal.srvs = make([]*KeepService, 16)
77 bal.KeepServices = make(map[string]*KeepService)
78 for i := range bal.srvs {
80 KeepService: arvados.KeepService{
81 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
84 srv.mounts = []*KeepMount{{
85 KeepMount: arvados.KeepMount{
86 UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
91 bal.KeepServices[srv.UUID] = srv
94 bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
98 func (bal *balancerSuite) TestPerfect(c *check.C) {
100 desired: map[string]int{"default": 2},
101 current: slots{0, 1},
106 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
108 desired: map[string]int{"default": 2},
109 current: slots{0, 2, 1},
110 shouldTrash: slots{2}})
113 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
115 desired: map[string]int{"default": 0},
116 current: slots{0, 1, 3},
117 shouldTrash: slots{0, 1, 3}})
120 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
122 desired: map[string]int{"default": 4},
123 current: slots{0, 1},
124 shouldPull: slots{2, 3}})
127 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
128 bal.srvList(0, slots{3})[0].ReadOnly = true
130 desired: map[string]int{"default": 4},
131 current: slots{0, 1},
132 shouldPull: slots{2, 4}})
135 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
137 desired: map[string]int{"default": 2},
138 current: slots{2, 0},
139 shouldPull: slots{1}})
141 desired: map[string]int{"default": 2},
142 current: slots{2, 7},
143 shouldPull: slots{0, 1}})
144 // if only one of the pulls succeeds, we'll see this next:
146 desired: map[string]int{"default": 2},
147 current: slots{2, 1, 7},
148 shouldPull: slots{0},
149 shouldTrash: slots{7}})
150 // if both pulls succeed, we'll see this next:
152 desired: map[string]int{"default": 2},
153 current: slots{2, 0, 1, 7},
154 shouldTrash: slots{2, 7}})
156 // unbalanced + excessive replication => pull + trash
158 desired: map[string]int{"default": 2},
159 current: slots{2, 5, 7},
160 shouldPull: slots{0, 1},
161 shouldTrash: slots{7}})
164 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
165 for s, srv := range bal.srvs {
166 for i := 0; i < 3; i++ {
167 m := *(srv.mounts[0])
168 m.UUID = fmt.Sprintf("zzzzz-mount-%015x", (s<<10)+i)
169 srv.mounts = append(srv.mounts, &m)
173 desired: map[string]int{"default": 2},
174 current: slots{0, 0},
175 shouldPull: slots{1}})
177 desired: map[string]int{"default": 2},
178 current: slots{2, 2},
179 shouldPull: slots{0, 1}})
181 desired: map[string]int{"default": 2},
182 current: slots{0, 0, 1},
183 shouldTrash: slots{0}})
185 desired: map[string]int{"default": 2},
186 current: slots{1, 1, 0},
187 shouldTrash: slots{1}})
189 desired: map[string]int{"default": 2},
190 current: slots{1, 0, 1, 0, 2},
191 shouldTrash: slots{0, 1, 2}})
193 desired: map[string]int{"default": 2},
194 current: slots{1, 1, 1, 0, 2},
195 shouldTrash: slots{1, 1, 2}})
197 desired: map[string]int{"default": 2},
198 current: slots{1, 1, 2},
199 shouldPull: slots{0},
200 shouldTrash: slots{1}})
202 desired: map[string]int{"default": 2},
203 current: slots{1, 1, 0},
204 timestamps: []int64{12345678, 12345678, 12345679},
207 desired: map[string]int{"default": 2},
208 current: slots{1, 1},
209 shouldPull: slots{0}})
212 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
213 // For purposes of increasing replication, we assume identical
214 // replicas are distinct.
216 desired: map[string]int{"default": 4},
217 current: slots{0, 1},
218 timestamps: []int64{12345678, 12345678},
219 shouldPull: slots{2, 3}})
222 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
223 // For purposes of decreasing replication, we assume identical
224 // replicas are NOT distinct.
226 desired: map[string]int{"default": 2},
227 current: slots{0, 1, 2},
228 timestamps: []int64{12345678, 12345678, 12345678}})
230 desired: map[string]int{"default": 2},
231 current: slots{0, 1, 2},
232 timestamps: []int64{12345678, 10000000, 10000000}})
235 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
236 oldTime := bal.MinMtime - 3600
237 newTime := bal.MinMtime + 3600
238 // The excess replica is too new to delete.
240 desired: map[string]int{"default": 2},
241 current: slots{0, 1, 2},
242 timestamps: []int64{oldTime, newTime, newTime + 1},
243 expectResult: balanceResult{
246 classState: map[string]balancedBlockState{"default": {
249 unachievable: false}}}})
250 // The best replicas are too new to delete, but the excess
251 // replica is old enough.
253 desired: map[string]int{"default": 2},
254 current: slots{0, 1, 2},
255 timestamps: []int64{newTime, newTime + 1, oldTime},
256 shouldTrash: slots{2}})
259 func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
260 bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
261 bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
262 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
263 c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
265 c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
268 desired: map[string]int{"default": 2},
270 shouldPull: slots{2}})
273 func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
274 bal.srvs[0].mounts[0].KeepMount.Replication = 2 // srv 0
275 bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e
277 // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2
280 desired: map[string]int{"default": 2},
282 shouldPull: slots{0}})
285 desired: map[string]int{"default": 2},
286 current: slots{0, 1},
290 desired: map[string]int{"default": 2},
291 current: slots{0, 1, 2},
292 shouldTrash: slots{2}})
295 desired: map[string]int{"default": 3},
296 current: slots{0, 2, 3, 4},
297 shouldPull: slots{1},
298 shouldTrash: slots{4},
299 expectResult: balanceResult{
302 classState: map[string]balancedBlockState{"default": {
305 unachievable: false}}}})
308 desired: map[string]int{"default": 3},
309 current: slots{0, 1, 2, 3, 4},
310 shouldTrash: slots{2, 3, 4}})
313 desired: map[string]int{"default": 4},
314 current: slots{0, 1, 2, 3, 4},
315 shouldTrash: slots{3, 4},
316 expectResult: balanceResult{
319 classState: map[string]balancedBlockState{"default": {
322 unachievable: false}}}})
323 // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2
326 desired: map[string]int{"default": 2},
328 expectResult: balanceResult{
331 classState: map[string]balancedBlockState{"default": {
334 unachievable: false}}}})
337 desired: map[string]int{"default": 3},
339 shouldPull: slots{1}})
342 desired: map[string]int{"default": 4},
344 shouldPull: slots{1, 2}})
347 desired: map[string]int{"default": 4},
349 shouldPull: slots{0, 1}})
352 desired: map[string]int{"default": 4},
354 shouldPull: slots{0, 1, 2},
355 expectResult: balanceResult{
358 classState: map[string]balancedBlockState{"default": {
361 unachievable: false}}}})
364 desired: map[string]int{"default": 2},
365 current: slots{1, 2, 3, 4},
366 shouldPull: slots{0},
367 shouldTrash: slots{3, 4}})
370 desired: map[string]int{"default": 2},
371 current: slots{0, 1, 2},
372 shouldTrash: slots{1, 2},
373 expectResult: balanceResult{
376 classState: map[string]balancedBlockState{"default": {
379 unachievable: false}}}})
382 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
383 bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
384 bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
385 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
386 // block 0 belongs on servers 3 and e, which have different
390 desired: map[string]int{"default": 2},
392 shouldPull: slots{0}})
393 // block 1 belongs on servers 0 and 9, which both report
394 // having a replica, but the replicas are on the same device
395 // ID -- so we should pull to the third position (7).
398 desired: map[string]int{"default": 2},
399 current: slots{0, 1},
400 shouldPull: slots{2}})
401 // block 1 can be pulled to the doubly-mounted device, but the
402 // pull should only be done on the first of the two servers.
405 desired: map[string]int{"default": 2},
407 shouldPull: slots{0}})
408 // block 0 has one replica on a single device mounted on two
409 // servers (e,9 at positions 1,9). Trashing the replica on 9
410 // would lose the block.
413 desired: map[string]int{"default": 2},
414 current: slots{1, 9},
415 shouldPull: slots{0},
416 expectResult: balanceResult{
418 classState: map[string]balancedBlockState{"default": {
421 unachievable: false}}}})
422 // block 0 is overreplicated, but the second and third
423 // replicas are the same replica according to DeviceID
424 // (despite different Mtimes). Don't trash the third replica.
427 desired: map[string]int{"default": 2},
428 current: slots{0, 1, 9},
429 expectResult: balanceResult{
431 classState: map[string]balancedBlockState{"default": {
434 unachievable: false}}}})
435 // block 0 is overreplicated; the third and fifth replicas are
436 // extra, but the fourth is another view of the second and
437 // shouldn't be trashed.
440 desired: map[string]int{"default": 2},
441 current: slots{0, 1, 5, 9, 12},
442 shouldTrash: slots{5, 12},
443 expectResult: balanceResult{
445 classState: map[string]balancedBlockState{"default": {
448 unachievable: false}}}})
451 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
452 // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
453 // probe order. For these tests we give it two mounts, one
454 // with classes=[special], one with
455 // classes=[special,special2].
456 bal.srvs[9].mounts = []*KeepMount{{
457 KeepMount: arvados.KeepMount{
459 StorageClasses: []string{"special"},
460 UUID: "zzzzz-mount-special00000009",
461 DeviceID: "9-special",
463 KeepService: bal.srvs[9],
465 KeepMount: arvados.KeepMount{
467 StorageClasses: []string{"special", "special2"},
468 UUID: "zzzzz-mount-special20000009",
469 DeviceID: "9-special-and-special2",
471 KeepService: bal.srvs[9],
473 // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
474 // probe order. We give it two mounts, one with
475 // classes=[special3], one with classes=[default].
476 bal.srvs[13].mounts = []*KeepMount{{
477 KeepMount: arvados.KeepMount{
479 StorageClasses: []string{"special2"},
480 UUID: "zzzzz-mount-special2000000d",
481 DeviceID: "13-special2",
483 KeepService: bal.srvs[13],
485 KeepMount: arvados.KeepMount{
487 StorageClasses: []string{"default"},
488 UUID: "zzzzz-mount-00000000000000d",
489 DeviceID: "13-default",
491 KeepService: bal.srvs[13],
493 // Pull to slot 9 because that's the only server with the
494 // desired class "special".
497 desired: map[string]int{"default": 2, "special": 1},
498 current: slots{0, 1},
499 shouldPull: slots{9},
500 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
501 // If some storage classes are not satisfied, don't trash any
502 // excess replicas. (E.g., if someone desires repl=1 on
503 // class=durable, and we have two copies on class=volatile, we
504 // should wait for pull to succeed before trashing anything).
507 desired: map[string]int{"special": 1},
508 current: slots{0, 1},
509 shouldPull: slots{9},
510 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
511 // Once storage classes are satisfied, trash excess replicas
512 // that appear earlier in probe order but aren't needed to
513 // satisfy the desired classes.
516 desired: map[string]int{"special": 1},
517 current: slots{0, 1, 9},
518 shouldTrash: slots{0, 1}})
519 // Pull to slot 5, the best server with class "special2".
522 desired: map[string]int{"special2": 1},
523 current: slots{0, 1},
524 shouldPull: slots{5},
525 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
526 // Pull to slot 5 and 9 to get replication 2 in desired class
530 desired: map[string]int{"special2": 2},
531 current: slots{0, 1},
532 shouldPull: slots{5, 9},
533 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
534 // Slot 0 has a replica in "default", slot 1 has a replica
535 // in "special"; we need another replica in "default", i.e.,
539 desired: map[string]int{"default": 2, "special": 1},
540 current: slots{0, 1},
541 shouldPull: slots{2}})
542 // Pull to best probe position 0 (despite wrong storage class)
543 // if it's impossible to achieve desired replication in the
544 // desired class (only slots 1 and 3 have special2).
547 desired: map[string]int{"special2": 3},
549 shouldPull: slots{0, 1}})
550 // Trash excess replica.
553 desired: map[string]int{"special": 1},
554 current: slots{0, 1},
555 shouldTrash: slots{1}})
556 // Leave one copy on slot 1 because slot 0 (server 9) only
560 desired: map[string]int{"special": 2},
561 current: slots{0, 1}})
564 // Clear all servers' changesets, balance a single block, and verify
565 // the appropriate changes for that block have been added to the
567 func (bal *balancerSuite) try(c *check.C, t tester) {
568 bal.setupLookupTables()
570 Replicas: bal.replList(t.known, t.current),
573 for i, t := range t.timestamps {
574 blk.Replicas[i].Mtime = t
576 for _, srv := range bal.srvs {
577 srv.ChangeSet = &ChangeSet{}
579 result := bal.balanceBlock(knownBlkid(t.known), blk)
581 var didPull, didTrash slots
582 var didPullMounts, didTrashMounts []string
583 for i, srv := range bal.srvs {
585 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
590 for _, pull := range srv.Pulls {
591 didPull = append(didPull, slot)
592 didPullMounts = append(didPullMounts, pull.To.UUID)
593 c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
595 for _, trash := range srv.Trashes {
596 didTrash = append(didTrash, slot)
597 didTrashMounts = append(didTrashMounts, trash.From.UUID)
598 c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
602 for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
603 sort.Sort(sort.IntSlice(list))
605 c.Check(didPull, check.DeepEquals, t.shouldPull)
606 c.Check(didTrash, check.DeepEquals, t.shouldTrash)
607 if t.shouldPullMounts != nil {
608 sort.Strings(didPullMounts)
609 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
611 if t.shouldTrashMounts != nil {
612 sort.Strings(didTrashMounts)
613 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
615 if t.expectResult.have > 0 {
616 c.Check(result.have, check.Equals, t.expectResult.have)
618 if t.expectResult.want > 0 {
619 c.Check(result.want, check.Equals, t.expectResult.want)
621 if t.expectResult.classState != nil {
622 c.Check(result.classState, check.DeepEquals, t.expectResult.classState)
626 // srvList returns the KeepServices, sorted in rendezvous order and
627 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
628 // returns the the first-, second-, and fifth-best servers for storing
629 // bal.knownBlkid(3).
630 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
631 for _, i := range order {
632 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
637 // replList is like srvList but returns an "existing replicas" slice,
638 // suitable for a BlockState test fixture.
639 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
640 nextMnt := map[*KeepService]int{}
641 mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
642 for _, srv := range bal.srvList(knownBlockID, order) {
643 // round-robin repls onto each srv's mounts
645 nextMnt[srv] = (n + 1) % len(srv.mounts)
647 repls = append(repls, Replica{srv.mounts[n], mtime})
653 // generate the same data hashes that are tested in
654 // sdk/go/keepclient/root_sorter_test.go
655 func knownBlkid(i int) arvados.SizedDigest {
656 return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))