1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.curoverse.com/arvados.git/sdk/go/arvados"
17 check "gopkg.in/check.v1"
21 func Test(t *testing.T) {
25 var _ = check.Suite(&balancerSuite{})
27 type balancerSuite struct {
30 blks map[string]tester
31 knownRendezvous [][]int
36 // index into knownRendezvous
44 desired map[string]int
50 shouldPullMounts []string
51 shouldTrashMounts []string
54 func (bal *balancerSuite) SetUpSuite(c *check.C) {
55 bal.knownRendezvous = nil
56 for _, str := range []string{
63 for _, c := range []byte(str) {
64 pos, _ := strconv.ParseUint(string(c), 16, 4)
65 slots = append(slots, int(pos))
67 bal.knownRendezvous = append(bal.knownRendezvous, slots)
70 bal.signatureTTL = 3600
73 func (bal *balancerSuite) SetUpTest(c *check.C) {
74 bal.srvs = make([]*KeepService, 16)
75 bal.KeepServices = make(map[string]*KeepService)
76 for i := range bal.srvs {
78 KeepService: arvados.KeepService{
79 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
82 srv.mounts = []*KeepMount{{
83 KeepMount: arvados.KeepMount{
84 UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
89 bal.KeepServices[srv.UUID] = srv
92 bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
95 func (bal *balancerSuite) TestPerfect(c *check.C) {
97 desired: map[string]int{"default": 2},
103 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
105 desired: map[string]int{"default": 2},
106 current: slots{0, 2, 1},
107 shouldTrash: slots{2}})
110 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
112 desired: map[string]int{"default": 0},
113 current: slots{0, 1, 3},
114 shouldTrash: slots{0, 1, 3}})
117 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
119 desired: map[string]int{"default": 4},
120 current: slots{0, 1},
121 shouldPull: slots{2, 3}})
124 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
125 bal.srvList(0, slots{3})[0].ReadOnly = true
127 desired: map[string]int{"default": 4},
128 current: slots{0, 1},
129 shouldPull: slots{2, 4}})
132 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
134 desired: map[string]int{"default": 2},
135 current: slots{2, 0},
136 shouldPull: slots{1}})
138 desired: map[string]int{"default": 2},
139 current: slots{2, 7},
140 shouldPull: slots{0, 1}})
141 // if only one of the pulls succeeds, we'll see this next:
143 desired: map[string]int{"default": 2},
144 current: slots{2, 1, 7},
145 shouldPull: slots{0},
146 shouldTrash: slots{7}})
147 // if both pulls succeed, we'll see this next:
149 desired: map[string]int{"default": 2},
150 current: slots{2, 0, 1, 7},
151 shouldTrash: slots{2, 7}})
153 // unbalanced + excessive replication => pull + trash
155 desired: map[string]int{"default": 2},
156 current: slots{2, 5, 7},
157 shouldPull: slots{0, 1},
158 shouldTrash: slots{7}})
161 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
162 for _, srv := range bal.srvs {
163 for i := 0; i < 3; i++ {
164 m := *(srv.mounts[0])
165 srv.mounts = append(srv.mounts, &m)
169 desired: map[string]int{"default": 2},
170 current: slots{0, 0},
171 shouldPull: slots{1}})
173 desired: map[string]int{"default": 2},
174 current: slots{2, 2},
175 shouldPull: slots{0, 1}})
177 desired: map[string]int{"default": 2},
178 current: slots{0, 0, 1},
179 shouldTrash: slots{0}})
181 desired: map[string]int{"default": 2},
182 current: slots{1, 1, 0},
183 shouldTrash: slots{1}})
185 desired: map[string]int{"default": 2},
186 current: slots{1, 0, 1, 0, 2},
187 shouldTrash: slots{0, 1, 2}})
189 desired: map[string]int{"default": 2},
190 current: slots{1, 1, 1, 0, 2},
191 shouldTrash: slots{1, 1, 2}})
193 desired: map[string]int{"default": 2},
194 current: slots{1, 1, 2},
195 shouldPull: slots{0},
196 shouldTrash: slots{1}})
198 desired: map[string]int{"default": 2},
199 current: slots{1, 1, 0},
200 timestamps: []int64{12345678, 12345678, 12345679},
203 desired: map[string]int{"default": 2},
204 current: slots{1, 1},
205 shouldPull: slots{0}})
208 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
209 // For purposes of increasing replication, we assume identical
210 // replicas are distinct.
212 desired: map[string]int{"default": 4},
213 current: slots{0, 1},
214 timestamps: []int64{12345678, 12345678},
215 shouldPull: slots{2, 3}})
218 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
219 // For purposes of decreasing replication, we assume identical
220 // replicas are NOT distinct.
222 desired: map[string]int{"default": 2},
223 current: slots{0, 1, 2},
224 timestamps: []int64{12345678, 12345678, 12345678}})
226 desired: map[string]int{"default": 2},
227 current: slots{0, 1, 2},
228 timestamps: []int64{12345678, 10000000, 10000000}})
231 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
232 oldTime := bal.MinMtime - 3600
233 newTime := bal.MinMtime + 3600
234 // The excess replica is too new to delete.
236 desired: map[string]int{"default": 2},
237 current: slots{0, 1, 2},
238 timestamps: []int64{oldTime, newTime, newTime + 1}})
239 // The best replicas are too new to delete, but the excess
240 // replica is old enough.
242 desired: map[string]int{"default": 2},
243 current: slots{0, 1, 2},
244 timestamps: []int64{newTime, newTime + 1, oldTime},
245 shouldTrash: slots{2}})
248 func (bal *balancerSuite) TestDedupDevices(c *check.C) {
249 bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
250 bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
251 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
252 c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
254 c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
257 desired: map[string]int{"default": 2},
259 shouldPull: slots{2}})
262 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
263 bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
264 bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
265 bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
266 // block 0 belongs on servers 3 and e, which have different
270 desired: map[string]int{"default": 2},
272 shouldPull: slots{0}})
273 // block 1 belongs on servers 0 and 9, which both report
274 // having a replica, but the replicas are on the same device
275 // ID -- so we should pull to the third position (7).
278 desired: map[string]int{"default": 2},
279 current: slots{0, 1},
280 shouldPull: slots{2}})
281 // block 1 can be pulled to the doubly-mounted device, but the
282 // pull should only be done on the first of the two servers.
285 desired: map[string]int{"default": 2},
287 shouldPull: slots{0}})
288 // block 0 has one replica on a single device mounted on two
289 // servers (e,9 at positions 1,9). Trashing the replica on 9
290 // would lose the block.
293 desired: map[string]int{"default": 2},
294 current: slots{1, 9},
295 shouldPull: slots{0}})
296 // block 0 is overreplicated, but the second and third
297 // replicas are the same replica according to DeviceID
298 // (despite different Mtimes). Don't trash the third replica.
301 desired: map[string]int{"default": 2},
302 current: slots{0, 1, 9}})
303 // block 0 is overreplicated; the third and fifth replicas are
304 // extra, but the fourth is another view of the second and
305 // shouldn't be trashed.
308 desired: map[string]int{"default": 2},
309 current: slots{0, 1, 5, 9, 12},
310 shouldTrash: slots{5, 12}})
313 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
314 // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
315 // probe order. For these tests we give it two mounts, one
316 // with classes=[special], one with
317 // classes=[special,special2].
318 bal.srvs[9].mounts = []*KeepMount{{
319 KeepMount: arvados.KeepMount{
321 StorageClasses: []string{"special"},
322 UUID: "zzzzz-mount-special00000009",
323 DeviceID: "9-special",
325 KeepService: bal.srvs[9],
327 KeepMount: arvados.KeepMount{
329 StorageClasses: []string{"special", "special2"},
330 UUID: "zzzzz-mount-special20000009",
331 DeviceID: "9-special-and-special2",
333 KeepService: bal.srvs[9],
335 // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
336 // probe order. We give it two mounts, one with
337 // classes=[special3], one with classes=[default].
338 bal.srvs[13].mounts = []*KeepMount{{
339 KeepMount: arvados.KeepMount{
341 StorageClasses: []string{"special2"},
342 UUID: "zzzzz-mount-special2000000d",
343 DeviceID: "13-special2",
345 KeepService: bal.srvs[13],
347 KeepMount: arvados.KeepMount{
349 StorageClasses: []string{"default"},
350 UUID: "zzzzz-mount-00000000000000d",
351 DeviceID: "13-default",
353 KeepService: bal.srvs[13],
355 // Pull to slot 9 because that's the only server with the
356 // desired class "special".
359 desired: map[string]int{"default": 2, "special": 1},
360 current: slots{0, 1},
361 shouldPull: slots{9},
362 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
363 // If some storage classes are not satisfied, don't trash any
364 // excess replicas. (E.g., if someone desires repl=1 on
365 // class=durable, and we have two copies on class=volatile, we
366 // should wait for pull to succeed before trashing anything).
369 desired: map[string]int{"special": 1},
370 current: slots{0, 1},
371 shouldPull: slots{9},
372 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
373 // Once storage classes are satisfied, trash excess replicas
374 // that appear earlier in probe order but aren't needed to
375 // satisfy the desired classes.
378 desired: map[string]int{"special": 1},
379 current: slots{0, 1, 9},
380 shouldTrash: slots{0, 1}})
381 // Pull to slot 5, the best server with class "special2".
384 desired: map[string]int{"special2": 1},
385 current: slots{0, 1},
386 shouldPull: slots{5},
387 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
388 // Pull to slot 5 and 9 to get replication 2 in desired class
392 desired: map[string]int{"special2": 2},
393 current: slots{0, 1},
394 shouldPull: slots{5, 9},
395 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
396 // Slot 0 has a replica in "default", slot 1 has a replica
397 // in "special"; we need another replica in "default", i.e.,
401 desired: map[string]int{"default": 2, "special": 1},
402 current: slots{0, 1},
403 shouldPull: slots{2}})
404 // Pull to best probe position 0 (despite wrong storage class)
405 // if it's impossible to achieve desired replication in the
406 // desired class (only slots 1 and 3 have special2).
409 desired: map[string]int{"special2": 3},
411 shouldPull: slots{0, 1}})
412 // Trash excess replica.
415 desired: map[string]int{"special": 1},
416 current: slots{0, 1},
417 shouldTrash: slots{1}})
418 // Leave one copy on slot 1 because slot 0 (server 9) only
422 desired: map[string]int{"special": 2},
423 current: slots{0, 1}})
426 // Clear all servers' changesets, balance a single block, and verify
427 // the appropriate changes for that block have been added to the
429 func (bal *balancerSuite) try(c *check.C, t tester) {
430 bal.setupLookupTables()
432 Replicas: bal.replList(t.known, t.current),
435 for i, t := range t.timestamps {
436 blk.Replicas[i].Mtime = t
438 for _, srv := range bal.srvs {
439 srv.ChangeSet = &ChangeSet{}
441 bal.balanceBlock(knownBlkid(t.known), blk)
443 var didPull, didTrash slots
444 var didPullMounts, didTrashMounts []string
445 for i, srv := range bal.srvs {
447 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
452 for _, pull := range srv.Pulls {
453 didPull = append(didPull, slot)
454 didPullMounts = append(didPullMounts, pull.To.UUID)
455 c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
457 for _, trash := range srv.Trashes {
458 didTrash = append(didTrash, slot)
459 didTrashMounts = append(didTrashMounts, trash.From.UUID)
460 c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
464 for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
465 sort.Sort(sort.IntSlice(list))
467 c.Check(didPull, check.DeepEquals, t.shouldPull)
468 c.Check(didTrash, check.DeepEquals, t.shouldTrash)
469 if t.shouldPullMounts != nil {
470 sort.Strings(didPullMounts)
471 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
473 if t.shouldTrashMounts != nil {
474 sort.Strings(didTrashMounts)
475 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
479 // srvList returns the KeepServices, sorted in rendezvous order and
480 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
481 // returns the the first-, second-, and fifth-best servers for storing
482 // bal.knownBlkid(3).
483 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
484 for _, i := range order {
485 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
490 // replList is like srvList but returns an "existing replicas" slice,
491 // suitable for a BlockState test fixture.
492 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
493 nextMnt := map[*KeepService]int{}
494 mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
495 for _, srv := range bal.srvList(knownBlockID, order) {
496 // round-robin repls onto each srv's mounts
498 nextMnt[srv] = (n + 1) % len(srv.mounts)
500 repls = append(repls, Replica{srv.mounts[n], mtime})
506 // generate the same data hashes that are tested in
507 // sdk/go/keepclient/root_sorter_test.go
508 func knownBlkid(i int) arvados.SizedDigest {
509 return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))