1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.curoverse.com/arvados.git/sdk/go/arvados"
17 check "gopkg.in/check.v1"
21 func Test(t *testing.T) {
25 var _ = check.Suite(&balancerSuite{})
27 type balancerSuite struct {
30 blks map[string]tester
31 knownRendezvous [][]int
36 // index into knownRendezvous
44 desired map[string]int
50 shouldPullMounts []string
51 shouldTrashMounts []string
53 expectResult balanceResult
56 func (bal *balancerSuite) SetUpSuite(c *check.C) {
57 bal.knownRendezvous = nil
58 for _, str := range []string{
65 for _, c := range []byte(str) {
66 pos, _ := strconv.ParseUint(string(c), 16, 4)
67 slots = append(slots, int(pos))
69 bal.knownRendezvous = append(bal.knownRendezvous, slots)
72 bal.signatureTTL = 3600
75 func (bal *balancerSuite) SetUpTest(c *check.C) {
76 bal.srvs = make([]*KeepService, 16)
77 bal.KeepServices = make(map[string]*KeepService)
78 for i := range bal.srvs {
80 KeepService: arvados.KeepService{
81 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
84 srv.mounts = []*KeepMount{{
85 KeepMount: arvados.KeepMount{
86 UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
91 bal.KeepServices[srv.UUID] = srv
94 bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
97 func (bal *balancerSuite) TestPerfect(c *check.C) {
99 desired: map[string]int{"default": 2},
100 current: slots{0, 1},
105 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
107 desired: map[string]int{"default": 2},
108 current: slots{0, 2, 1},
109 shouldTrash: slots{2}})
112 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
114 desired: map[string]int{"default": 0},
115 current: slots{0, 1, 3},
116 shouldTrash: slots{0, 1, 3}})
119 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
121 desired: map[string]int{"default": 4},
122 current: slots{0, 1},
123 shouldPull: slots{2, 3}})
126 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
127 bal.srvList(0, slots{3})[0].ReadOnly = true
129 desired: map[string]int{"default": 4},
130 current: slots{0, 1},
131 shouldPull: slots{2, 4}})
134 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
136 desired: map[string]int{"default": 2},
137 current: slots{2, 0},
138 shouldPull: slots{1}})
140 desired: map[string]int{"default": 2},
141 current: slots{2, 7},
142 shouldPull: slots{0, 1}})
143 // if only one of the pulls succeeds, we'll see this next:
145 desired: map[string]int{"default": 2},
146 current: slots{2, 1, 7},
147 shouldPull: slots{0},
148 shouldTrash: slots{7}})
149 // if both pulls succeed, we'll see this next:
151 desired: map[string]int{"default": 2},
152 current: slots{2, 0, 1, 7},
153 shouldTrash: slots{2, 7}})
155 // unbalanced + excessive replication => pull + trash
157 desired: map[string]int{"default": 2},
158 current: slots{2, 5, 7},
159 shouldPull: slots{0, 1},
160 shouldTrash: slots{7}})
163 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
164 for _, srv := range bal.srvs {
165 for i := 0; i < 3; i++ {
166 m := *(srv.mounts[0])
167 srv.mounts = append(srv.mounts, &m)
171 desired: map[string]int{"default": 2},
172 current: slots{0, 0},
173 shouldPull: slots{1}})
175 desired: map[string]int{"default": 2},
176 current: slots{2, 2},
177 shouldPull: slots{0, 1}})
179 desired: map[string]int{"default": 2},
180 current: slots{0, 0, 1},
181 shouldTrash: slots{0}})
183 desired: map[string]int{"default": 2},
184 current: slots{1, 1, 0},
185 shouldTrash: slots{1}})
187 desired: map[string]int{"default": 2},
188 current: slots{1, 0, 1, 0, 2},
189 shouldTrash: slots{0, 1, 2}})
191 desired: map[string]int{"default": 2},
192 current: slots{1, 1, 1, 0, 2},
193 shouldTrash: slots{1, 1, 2}})
195 desired: map[string]int{"default": 2},
196 current: slots{1, 1, 2},
197 shouldPull: slots{0},
198 shouldTrash: slots{1}})
200 desired: map[string]int{"default": 2},
201 current: slots{1, 1, 0},
202 timestamps: []int64{12345678, 12345678, 12345679},
205 desired: map[string]int{"default": 2},
206 current: slots{1, 1},
207 shouldPull: slots{0}})
210 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
211 // For purposes of increasing replication, we assume identical
212 // replicas are distinct.
214 desired: map[string]int{"default": 4},
215 current: slots{0, 1},
216 timestamps: []int64{12345678, 12345678},
217 shouldPull: slots{2, 3}})
220 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
221 // For purposes of decreasing replication, we assume identical
222 // replicas are NOT distinct.
224 desired: map[string]int{"default": 2},
225 current: slots{0, 1, 2},
226 timestamps: []int64{12345678, 12345678, 12345678}})
228 desired: map[string]int{"default": 2},
229 current: slots{0, 1, 2},
230 timestamps: []int64{12345678, 10000000, 10000000}})
233 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
234 oldTime := bal.MinMtime - 3600
235 newTime := bal.MinMtime + 3600
236 // The excess replica is too new to delete.
238 desired: map[string]int{"default": 2},
239 current: slots{0, 1, 2},
240 timestamps: []int64{oldTime, newTime, newTime + 1}})
241 // The best replicas are too new to delete, but the excess
242 // replica is old enough.
244 desired: map[string]int{"default": 2},
245 current: slots{0, 1, 2},
246 timestamps: []int64{newTime, newTime + 1, oldTime},
247 shouldTrash: slots{2}})
250 func (bal *balancerSuite) TestDedupDevices(c *check.C) {
251 bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
252 bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
253 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
254 c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
256 c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
259 desired: map[string]int{"default": 2},
261 shouldPull: slots{2}})
264 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
265 bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
266 bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
267 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
268 // block 0 belongs on servers 3 and e, which have different
272 desired: map[string]int{"default": 2},
274 shouldPull: slots{0}})
275 // block 1 belongs on servers 0 and 9, which both report
276 // having a replica, but the replicas are on the same device
277 // ID -- so we should pull to the third position (7).
280 desired: map[string]int{"default": 2},
281 current: slots{0, 1},
282 shouldPull: slots{2}})
283 // block 1 can be pulled to the doubly-mounted device, but the
284 // pull should only be done on the first of the two servers.
287 desired: map[string]int{"default": 2},
289 shouldPull: slots{0}})
290 // block 0 has one replica on a single device mounted on two
291 // servers (e,9 at positions 1,9). Trashing the replica on 9
292 // would lose the block.
295 desired: map[string]int{"default": 2},
296 current: slots{1, 9},
297 shouldPull: slots{0},
298 expectResult: balanceResult{
300 classState: map[string]balancedBlockState{"default": {
303 unachievable: false}}}})
304 // block 0 is overreplicated, but the second and third
305 // replicas are the same replica according to DeviceID
306 // (despite different Mtimes). Don't trash the third replica.
309 desired: map[string]int{"default": 2},
310 current: slots{0, 1, 9},
311 expectResult: balanceResult{
313 classState: map[string]balancedBlockState{"default": {
316 unachievable: false}}}})
317 // block 0 is overreplicated; the third and fifth replicas are
318 // extra, but the fourth is another view of the second and
319 // shouldn't be trashed.
322 desired: map[string]int{"default": 2},
323 current: slots{0, 1, 5, 9, 12},
324 shouldTrash: slots{5, 12},
325 expectResult: balanceResult{
327 classState: map[string]balancedBlockState{"default": {
330 unachievable: false}}}})
333 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
334 // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
335 // probe order. For these tests we give it two mounts, one
336 // with classes=[special], one with
337 // classes=[special,special2].
338 bal.srvs[9].mounts = []*KeepMount{{
339 KeepMount: arvados.KeepMount{
341 StorageClasses: []string{"special"},
342 UUID: "zzzzz-mount-special00000009",
343 DeviceID: "9-special",
345 KeepService: bal.srvs[9],
347 KeepMount: arvados.KeepMount{
349 StorageClasses: []string{"special", "special2"},
350 UUID: "zzzzz-mount-special20000009",
351 DeviceID: "9-special-and-special2",
353 KeepService: bal.srvs[9],
355 // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
356 // probe order. We give it two mounts, one with
357 // classes=[special3], one with classes=[default].
358 bal.srvs[13].mounts = []*KeepMount{{
359 KeepMount: arvados.KeepMount{
361 StorageClasses: []string{"special2"},
362 UUID: "zzzzz-mount-special2000000d",
363 DeviceID: "13-special2",
365 KeepService: bal.srvs[13],
367 KeepMount: arvados.KeepMount{
369 StorageClasses: []string{"default"},
370 UUID: "zzzzz-mount-00000000000000d",
371 DeviceID: "13-default",
373 KeepService: bal.srvs[13],
375 // Pull to slot 9 because that's the only server with the
376 // desired class "special".
379 desired: map[string]int{"default": 2, "special": 1},
380 current: slots{0, 1},
381 shouldPull: slots{9},
382 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
383 // If some storage classes are not satisfied, don't trash any
384 // excess replicas. (E.g., if someone desires repl=1 on
385 // class=durable, and we have two copies on class=volatile, we
386 // should wait for pull to succeed before trashing anything).
389 desired: map[string]int{"special": 1},
390 current: slots{0, 1},
391 shouldPull: slots{9},
392 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
393 // Once storage classes are satisfied, trash excess replicas
394 // that appear earlier in probe order but aren't needed to
395 // satisfy the desired classes.
398 desired: map[string]int{"special": 1},
399 current: slots{0, 1, 9},
400 shouldTrash: slots{0, 1}})
401 // Pull to slot 5, the best server with class "special2".
404 desired: map[string]int{"special2": 1},
405 current: slots{0, 1},
406 shouldPull: slots{5},
407 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
408 // Pull to slot 5 and 9 to get replication 2 in desired class
412 desired: map[string]int{"special2": 2},
413 current: slots{0, 1},
414 shouldPull: slots{5, 9},
415 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
416 // Slot 0 has a replica in "default", slot 1 has a replica
417 // in "special"; we need another replica in "default", i.e.,
421 desired: map[string]int{"default": 2, "special": 1},
422 current: slots{0, 1},
423 shouldPull: slots{2}})
424 // Pull to best probe position 0 (despite wrong storage class)
425 // if it's impossible to achieve desired replication in the
426 // desired class (only slots 1 and 3 have special2).
429 desired: map[string]int{"special2": 3},
431 shouldPull: slots{0, 1}})
432 // Trash excess replica.
435 desired: map[string]int{"special": 1},
436 current: slots{0, 1},
437 shouldTrash: slots{1}})
438 // Leave one copy on slot 1 because slot 0 (server 9) only
442 desired: map[string]int{"special": 2},
443 current: slots{0, 1}})
446 // Clear all servers' changesets, balance a single block, and verify
447 // the appropriate changes for that block have been added to the
449 func (bal *balancerSuite) try(c *check.C, t tester) {
450 bal.setupLookupTables()
452 Replicas: bal.replList(t.known, t.current),
455 for i, t := range t.timestamps {
456 blk.Replicas[i].Mtime = t
458 for _, srv := range bal.srvs {
459 srv.ChangeSet = &ChangeSet{}
461 result := bal.balanceBlock(knownBlkid(t.known), blk)
463 var didPull, didTrash slots
464 var didPullMounts, didTrashMounts []string
465 for i, srv := range bal.srvs {
467 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
472 for _, pull := range srv.Pulls {
473 didPull = append(didPull, slot)
474 didPullMounts = append(didPullMounts, pull.To.UUID)
475 c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
477 for _, trash := range srv.Trashes {
478 didTrash = append(didTrash, slot)
479 didTrashMounts = append(didTrashMounts, trash.From.UUID)
480 c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
484 for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
485 sort.Sort(sort.IntSlice(list))
487 c.Check(didPull, check.DeepEquals, t.shouldPull)
488 c.Check(didTrash, check.DeepEquals, t.shouldTrash)
489 if t.shouldPullMounts != nil {
490 sort.Strings(didPullMounts)
491 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
493 if t.shouldTrashMounts != nil {
494 sort.Strings(didTrashMounts)
495 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
497 if t.expectResult.have > 0 {
498 c.Check(result.have, check.Equals, t.expectResult.have)
500 if t.expectResult.classState != nil {
501 c.Check(result.classState, check.DeepEquals, t.expectResult.classState)
505 // srvList returns the KeepServices, sorted in rendezvous order and
506 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
507 // returns the the first-, second-, and fifth-best servers for storing
508 // bal.knownBlkid(3).
509 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
510 for _, i := range order {
511 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
516 // replList is like srvList but returns an "existing replicas" slice,
517 // suitable for a BlockState test fixture.
518 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
519 nextMnt := map[*KeepService]int{}
520 mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
521 for _, srv := range bal.srvList(knownBlockID, order) {
522 // round-robin repls onto each srv's mounts
524 nextMnt[srv] = (n + 1) % len(srv.mounts)
526 repls = append(repls, Replica{srv.mounts[n], mtime})
532 // generate the same data hashes that are tested in
533 // sdk/go/keepclient/root_sorter_test.go
534 func knownBlkid(i int) arvados.SizedDigest {
535 return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))