Merge branch '11907-acr-stable-pdh' refs #11907
[arvados.git] / services / keep-balance / balance_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "crypto/md5"
9         "fmt"
10         "sort"
11         "strconv"
12         "testing"
13         "time"
14
15         "git.curoverse.com/arvados.git/sdk/go/arvados"
16
17         check "gopkg.in/check.v1"
18 )
19
20 // Test with Gocheck
21 func Test(t *testing.T) {
22         check.TestingT(t)
23 }
24
25 var _ = check.Suite(&balancerSuite{})
26
27 type balancerSuite struct {
28         Balancer
29         srvs            []*KeepService
30         blks            map[string]tester
31         knownRendezvous [][]int
32         signatureTTL    int64
33 }
34
35 const (
36         // index into knownRendezvous
37         known0 = 0
38 )
39
40 type slots []int
41
42 type tester struct {
43         known       int
44         desired     map[string]int
45         current     slots
46         timestamps  []int64
47         shouldPull  slots
48         shouldTrash slots
49
50         shouldPullMounts  []string
51         shouldTrashMounts []string
52
53         expectResult balanceResult
54 }
55
56 func (bal *balancerSuite) SetUpSuite(c *check.C) {
57         bal.knownRendezvous = nil
58         for _, str := range []string{
59                 "3eab2d5fc9681074",
60                 "097dba52e648f1c3",
61                 "c5b4e023f8a7d691",
62                 "9d81c02e76a3bf54",
63         } {
64                 var slots []int
65                 for _, c := range []byte(str) {
66                         pos, _ := strconv.ParseUint(string(c), 16, 4)
67                         slots = append(slots, int(pos))
68                 }
69                 bal.knownRendezvous = append(bal.knownRendezvous, slots)
70         }
71
72         bal.signatureTTL = 3600
73 }
74
75 func (bal *balancerSuite) SetUpTest(c *check.C) {
76         bal.srvs = make([]*KeepService, 16)
77         bal.KeepServices = make(map[string]*KeepService)
78         for i := range bal.srvs {
79                 srv := &KeepService{
80                         KeepService: arvados.KeepService{
81                                 UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i),
82                         },
83                 }
84                 srv.mounts = []*KeepMount{{
85                         KeepMount: arvados.KeepMount{
86                                 UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
87                         },
88                         KeepService: srv,
89                 }}
90                 bal.srvs[i] = srv
91                 bal.KeepServices[srv.UUID] = srv
92         }
93
94         bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
95 }
96
97 func (bal *balancerSuite) TestPerfect(c *check.C) {
98         bal.try(c, tester{
99                 desired:     map[string]int{"default": 2},
100                 current:     slots{0, 1},
101                 shouldPull:  nil,
102                 shouldTrash: nil})
103 }
104
105 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
106         bal.try(c, tester{
107                 desired:     map[string]int{"default": 2},
108                 current:     slots{0, 2, 1},
109                 shouldTrash: slots{2}})
110 }
111
112 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
113         bal.try(c, tester{
114                 desired:     map[string]int{"default": 0},
115                 current:     slots{0, 1, 3},
116                 shouldTrash: slots{0, 1, 3}})
117 }
118
119 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
120         bal.try(c, tester{
121                 desired:    map[string]int{"default": 4},
122                 current:    slots{0, 1},
123                 shouldPull: slots{2, 3}})
124 }
125
126 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
127         bal.srvList(0, slots{3})[0].ReadOnly = true
128         bal.try(c, tester{
129                 desired:    map[string]int{"default": 4},
130                 current:    slots{0, 1},
131                 shouldPull: slots{2, 4}})
132 }
133
134 func (bal *balancerSuite) TestFixUnbalanced(c *check.C) {
135         bal.try(c, tester{
136                 desired:    map[string]int{"default": 2},
137                 current:    slots{2, 0},
138                 shouldPull: slots{1}})
139         bal.try(c, tester{
140                 desired:    map[string]int{"default": 2},
141                 current:    slots{2, 7},
142                 shouldPull: slots{0, 1}})
143         // if only one of the pulls succeeds, we'll see this next:
144         bal.try(c, tester{
145                 desired:     map[string]int{"default": 2},
146                 current:     slots{2, 1, 7},
147                 shouldPull:  slots{0},
148                 shouldTrash: slots{7}})
149         // if both pulls succeed, we'll see this next:
150         bal.try(c, tester{
151                 desired:     map[string]int{"default": 2},
152                 current:     slots{2, 0, 1, 7},
153                 shouldTrash: slots{2, 7}})
154
155         // unbalanced + excessive replication => pull + trash
156         bal.try(c, tester{
157                 desired:     map[string]int{"default": 2},
158                 current:     slots{2, 5, 7},
159                 shouldPull:  slots{0, 1},
160                 shouldTrash: slots{7}})
161 }
162
163 func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) {
164         for _, srv := range bal.srvs {
165                 for i := 0; i < 3; i++ {
166                         m := *(srv.mounts[0])
167                         srv.mounts = append(srv.mounts, &m)
168                 }
169         }
170         bal.try(c, tester{
171                 desired:    map[string]int{"default": 2},
172                 current:    slots{0, 0},
173                 shouldPull: slots{1}})
174         bal.try(c, tester{
175                 desired:    map[string]int{"default": 2},
176                 current:    slots{2, 2},
177                 shouldPull: slots{0, 1}})
178         bal.try(c, tester{
179                 desired:     map[string]int{"default": 2},
180                 current:     slots{0, 0, 1},
181                 shouldTrash: slots{0}})
182         bal.try(c, tester{
183                 desired:     map[string]int{"default": 2},
184                 current:     slots{1, 1, 0},
185                 shouldTrash: slots{1}})
186         bal.try(c, tester{
187                 desired:     map[string]int{"default": 2},
188                 current:     slots{1, 0, 1, 0, 2},
189                 shouldTrash: slots{0, 1, 2}})
190         bal.try(c, tester{
191                 desired:     map[string]int{"default": 2},
192                 current:     slots{1, 1, 1, 0, 2},
193                 shouldTrash: slots{1, 1, 2}})
194         bal.try(c, tester{
195                 desired:     map[string]int{"default": 2},
196                 current:     slots{1, 1, 2},
197                 shouldPull:  slots{0},
198                 shouldTrash: slots{1}})
199         bal.try(c, tester{
200                 desired:     map[string]int{"default": 2},
201                 current:     slots{1, 1, 0},
202                 timestamps:  []int64{12345678, 12345678, 12345679},
203                 shouldTrash: nil})
204         bal.try(c, tester{
205                 desired:    map[string]int{"default": 2},
206                 current:    slots{1, 1},
207                 shouldPull: slots{0}})
208 }
209
210 func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) {
211         // For purposes of increasing replication, we assume identical
212         // replicas are distinct.
213         bal.try(c, tester{
214                 desired:    map[string]int{"default": 4},
215                 current:    slots{0, 1},
216                 timestamps: []int64{12345678, 12345678},
217                 shouldPull: slots{2, 3}})
218 }
219
220 func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
221         // For purposes of decreasing replication, we assume identical
222         // replicas are NOT distinct.
223         bal.try(c, tester{
224                 desired:    map[string]int{"default": 2},
225                 current:    slots{0, 1, 2},
226                 timestamps: []int64{12345678, 12345678, 12345678}})
227         bal.try(c, tester{
228                 desired:    map[string]int{"default": 2},
229                 current:    slots{0, 1, 2},
230                 timestamps: []int64{12345678, 10000000, 10000000}})
231 }
232
233 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
234         oldTime := bal.MinMtime - 3600
235         newTime := bal.MinMtime + 3600
236         // The excess replica is too new to delete.
237         bal.try(c, tester{
238                 desired:    map[string]int{"default": 2},
239                 current:    slots{0, 1, 2},
240                 timestamps: []int64{oldTime, newTime, newTime + 1}})
241         // The best replicas are too new to delete, but the excess
242         // replica is old enough.
243         bal.try(c, tester{
244                 desired:     map[string]int{"default": 2},
245                 current:     slots{0, 1, 2},
246                 timestamps:  []int64{newTime, newTime + 1, oldTime},
247                 shouldTrash: slots{2}})
248 }
249
250 func (bal *balancerSuite) TestDedupDevices(c *check.C) {
251         bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
252         bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
253         bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
254         c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
255         bal.dedupDevices()
256         c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
257         bal.try(c, tester{
258                 known:      0,
259                 desired:    map[string]int{"default": 2},
260                 current:    slots{1},
261                 shouldPull: slots{2}})
262 }
263
264 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
265         bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
266         bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
267         bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
268         // block 0 belongs on servers 3 and e, which have different
269         // device IDs.
270         bal.try(c, tester{
271                 known:      0,
272                 desired:    map[string]int{"default": 2},
273                 current:    slots{1},
274                 shouldPull: slots{0}})
275         // block 1 belongs on servers 0 and 9, which both report
276         // having a replica, but the replicas are on the same device
277         // ID -- so we should pull to the third position (7).
278         bal.try(c, tester{
279                 known:      1,
280                 desired:    map[string]int{"default": 2},
281                 current:    slots{0, 1},
282                 shouldPull: slots{2}})
283         // block 1 can be pulled to the doubly-mounted device, but the
284         // pull should only be done on the first of the two servers.
285         bal.try(c, tester{
286                 known:      1,
287                 desired:    map[string]int{"default": 2},
288                 current:    slots{2},
289                 shouldPull: slots{0}})
290         // block 0 has one replica on a single device mounted on two
291         // servers (e,9 at positions 1,9). Trashing the replica on 9
292         // would lose the block.
293         bal.try(c, tester{
294                 known:      0,
295                 desired:    map[string]int{"default": 2},
296                 current:    slots{1, 9},
297                 shouldPull: slots{0},
298                 expectResult: balanceResult{
299                         have: 1,
300                         classState: map[string]balancedBlockState{"default": {
301                                 desired:      2,
302                                 surplus:      -1,
303                                 unachievable: false}}}})
304         // block 0 is overreplicated, but the second and third
305         // replicas are the same replica according to DeviceID
306         // (despite different Mtimes). Don't trash the third replica.
307         bal.try(c, tester{
308                 known:   0,
309                 desired: map[string]int{"default": 2},
310                 current: slots{0, 1, 9},
311                 expectResult: balanceResult{
312                         have: 2,
313                         classState: map[string]balancedBlockState{"default": {
314                                 desired:      2,
315                                 surplus:      0,
316                                 unachievable: false}}}})
317         // block 0 is overreplicated; the third and fifth replicas are
318         // extra, but the fourth is another view of the second and
319         // shouldn't be trashed.
320         bal.try(c, tester{
321                 known:       0,
322                 desired:     map[string]int{"default": 2},
323                 current:     slots{0, 1, 5, 9, 12},
324                 shouldTrash: slots{5, 12},
325                 expectResult: balanceResult{
326                         have: 4,
327                         classState: map[string]balancedBlockState{"default": {
328                                 desired:      2,
329                                 surplus:      2,
330                                 unachievable: false}}}})
331 }
332
333 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
334         // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
335         // probe order. For these tests we give it two mounts, one
336         // with classes=[special], one with
337         // classes=[special,special2].
338         bal.srvs[9].mounts = []*KeepMount{{
339                 KeepMount: arvados.KeepMount{
340                         Replication:    1,
341                         StorageClasses: []string{"special"},
342                         UUID:           "zzzzz-mount-special00000009",
343                         DeviceID:       "9-special",
344                 },
345                 KeepService: bal.srvs[9],
346         }, {
347                 KeepMount: arvados.KeepMount{
348                         Replication:    1,
349                         StorageClasses: []string{"special", "special2"},
350                         UUID:           "zzzzz-mount-special20000009",
351                         DeviceID:       "9-special-and-special2",
352                 },
353                 KeepService: bal.srvs[9],
354         }}
355         // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in
356         // probe order. We give it two mounts, one with
357         // classes=[special3], one with classes=[default].
358         bal.srvs[13].mounts = []*KeepMount{{
359                 KeepMount: arvados.KeepMount{
360                         Replication:    1,
361                         StorageClasses: []string{"special2"},
362                         UUID:           "zzzzz-mount-special2000000d",
363                         DeviceID:       "13-special2",
364                 },
365                 KeepService: bal.srvs[13],
366         }, {
367                 KeepMount: arvados.KeepMount{
368                         Replication:    1,
369                         StorageClasses: []string{"default"},
370                         UUID:           "zzzzz-mount-00000000000000d",
371                         DeviceID:       "13-default",
372                 },
373                 KeepService: bal.srvs[13],
374         }}
375         // Pull to slot 9 because that's the only server with the
376         // desired class "special".
377         bal.try(c, tester{
378                 known:            0,
379                 desired:          map[string]int{"default": 2, "special": 1},
380                 current:          slots{0, 1},
381                 shouldPull:       slots{9},
382                 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
383         // If some storage classes are not satisfied, don't trash any
384         // excess replicas. (E.g., if someone desires repl=1 on
385         // class=durable, and we have two copies on class=volatile, we
386         // should wait for pull to succeed before trashing anything).
387         bal.try(c, tester{
388                 known:            0,
389                 desired:          map[string]int{"special": 1},
390                 current:          slots{0, 1},
391                 shouldPull:       slots{9},
392                 shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
393         // Once storage classes are satisfied, trash excess replicas
394         // that appear earlier in probe order but aren't needed to
395         // satisfy the desired classes.
396         bal.try(c, tester{
397                 known:       0,
398                 desired:     map[string]int{"special": 1},
399                 current:     slots{0, 1, 9},
400                 shouldTrash: slots{0, 1}})
401         // Pull to slot 5, the best server with class "special2".
402         bal.try(c, tester{
403                 known:            0,
404                 desired:          map[string]int{"special2": 1},
405                 current:          slots{0, 1},
406                 shouldPull:       slots{5},
407                 shouldPullMounts: []string{"zzzzz-mount-special2000000d"}})
408         // Pull to slot 5 and 9 to get replication 2 in desired class
409         // "special2".
410         bal.try(c, tester{
411                 known:            0,
412                 desired:          map[string]int{"special2": 2},
413                 current:          slots{0, 1},
414                 shouldPull:       slots{5, 9},
415                 shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}})
416         // Slot 0 has a replica in "default", slot 1 has a replica
417         // in "special"; we need another replica in "default", i.e.,
418         // on slot 2.
419         bal.try(c, tester{
420                 known:      1,
421                 desired:    map[string]int{"default": 2, "special": 1},
422                 current:    slots{0, 1},
423                 shouldPull: slots{2}})
424         // Pull to best probe position 0 (despite wrong storage class)
425         // if it's impossible to achieve desired replication in the
426         // desired class (only slots 1 and 3 have special2).
427         bal.try(c, tester{
428                 known:      1,
429                 desired:    map[string]int{"special2": 3},
430                 current:    slots{3},
431                 shouldPull: slots{0, 1}})
432         // Trash excess replica.
433         bal.try(c, tester{
434                 known:       3,
435                 desired:     map[string]int{"special": 1},
436                 current:     slots{0, 1},
437                 shouldTrash: slots{1}})
438         // Leave one copy on slot 1 because slot 0 (server 9) only
439         // gives us repl=1.
440         bal.try(c, tester{
441                 known:   3,
442                 desired: map[string]int{"special": 2},
443                 current: slots{0, 1}})
444 }
445
446 // Clear all servers' changesets, balance a single block, and verify
447 // the appropriate changes for that block have been added to the
448 // changesets.
449 func (bal *balancerSuite) try(c *check.C, t tester) {
450         bal.setupLookupTables()
451         blk := &BlockState{
452                 Replicas: bal.replList(t.known, t.current),
453                 Desired:  t.desired,
454         }
455         for i, t := range t.timestamps {
456                 blk.Replicas[i].Mtime = t
457         }
458         for _, srv := range bal.srvs {
459                 srv.ChangeSet = &ChangeSet{}
460         }
461         result := bal.balanceBlock(knownBlkid(t.known), blk)
462
463         var didPull, didTrash slots
464         var didPullMounts, didTrashMounts []string
465         for i, srv := range bal.srvs {
466                 var slot int
467                 for probeOrder, srvNum := range bal.knownRendezvous[t.known] {
468                         if srvNum == i {
469                                 slot = probeOrder
470                         }
471                 }
472                 for _, pull := range srv.Pulls {
473                         didPull = append(didPull, slot)
474                         didPullMounts = append(didPullMounts, pull.To.UUID)
475                         c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known))
476                 }
477                 for _, trash := range srv.Trashes {
478                         didTrash = append(didTrash, slot)
479                         didTrashMounts = append(didTrashMounts, trash.From.UUID)
480                         c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known))
481                 }
482         }
483
484         for _, list := range []slots{didPull, didTrash, t.shouldPull, t.shouldTrash} {
485                 sort.Sort(sort.IntSlice(list))
486         }
487         c.Check(didPull, check.DeepEquals, t.shouldPull)
488         c.Check(didTrash, check.DeepEquals, t.shouldTrash)
489         if t.shouldPullMounts != nil {
490                 sort.Strings(didPullMounts)
491                 c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts)
492         }
493         if t.shouldTrashMounts != nil {
494                 sort.Strings(didTrashMounts)
495                 c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
496         }
497         if t.expectResult.have > 0 {
498                 c.Check(result.have, check.Equals, t.expectResult.have)
499         }
500         if t.expectResult.classState != nil {
501                 c.Check(result.classState, check.DeepEquals, t.expectResult.classState)
502         }
503 }
504
505 // srvList returns the KeepServices, sorted in rendezvous order and
506 // then selected by idx. For example, srvList(3, slots{0, 1, 4})
507 // returns the the first-, second-, and fifth-best servers for storing
508 // bal.knownBlkid(3).
509 func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) {
510         for _, i := range order {
511                 srvs = append(srvs, bal.srvs[bal.knownRendezvous[knownBlockID][i]])
512         }
513         return
514 }
515
516 // replList is like srvList but returns an "existing replicas" slice,
517 // suitable for a BlockState test fixture.
518 func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) {
519         nextMnt := map[*KeepService]int{}
520         mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9
521         for _, srv := range bal.srvList(knownBlockID, order) {
522                 // round-robin repls onto each srv's mounts
523                 n := nextMnt[srv]
524                 nextMnt[srv] = (n + 1) % len(srv.mounts)
525
526                 repls = append(repls, Replica{srv.mounts[n], mtime})
527                 mtime++
528         }
529         return
530 }
531
532 // generate the same data hashes that are tested in
533 // sdk/go/keepclient/root_sorter_test.go
534 func knownBlkid(i int) arvados.SizedDigest {
535         return arvados.SizedDigest(fmt.Sprintf("%x+64", md5.Sum([]byte(fmt.Sprintf("%064x", i)))))
536 }