X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/8e31910034627dedd7259dd1e45a60768108c1e1..c9b029a3b2efaf6babe458cfaa70b08a3debd4cc:/services/keep-balance/balance_test.go diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go index 8650de141c..e5bdf9c023 100644 --- a/services/keep-balance/balance_test.go +++ b/services/keep-balance/balance_test.go @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: AGPL-3.0 -package main +package keepbalance import ( "crypto/md5" @@ -12,8 +12,8 @@ import ( "testing" "time" - "git.curoverse.com/arvados.git/sdk/go/arvados" - + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/ctxlog" check "gopkg.in/check.v1" ) @@ -50,7 +50,8 @@ type tester struct { shouldPullMounts []string shouldTrashMounts []string - expectResult balanceResult + expectBlockState *balancedBlockState + expectClassState map[string]balancedBlockState } func (bal *balancerSuite) SetUpSuite(c *check.C) { @@ -70,6 +71,7 @@ func (bal *balancerSuite) SetUpSuite(c *check.C) { } bal.signatureTTL = 3600 + bal.Logger = ctxlog.TestLogger(c) } func (bal *balancerSuite) SetUpTest(c *check.C) { @@ -83,7 +85,10 @@ func (bal *balancerSuite) SetUpTest(c *check.C) { } srv.mounts = []*KeepMount{{ KeepMount: arvados.KeepMount{ - UUID: fmt.Sprintf("zzzzz-mount-%015x", i), + UUID: fmt.Sprintf("zzzzz-mount-%015x", i), + StorageClasses: map[string]bool{"default": true}, + AllowWrite: true, + AllowTrash: true, }, KeepService: srv, }} @@ -92,6 +97,7 @@ func (bal *balancerSuite) SetUpTest(c *check.C) { } bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9 + bal.cleanupMounts() } func (bal *balancerSuite) TestPerfect(c *check.C) { @@ -99,28 +105,42 @@ func (bal *balancerSuite) TestPerfect(c *check.C) { desired: map[string]int{"default": 2}, current: slots{0, 1}, shouldPull: nil, - shouldTrash: nil}) + shouldTrash: nil, + expectBlockState: &balancedBlockState{ + needed: 2, + }}) } func (bal *balancerSuite) TestDecreaseRepl(c *check.C) { bal.try(c, tester{ desired: map[string]int{"default": 2}, current: slots{0, 2, 1}, - shouldTrash: slots{2}}) + shouldTrash: slots{2}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 1, + }}) } func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) { bal.try(c, tester{ desired: map[string]int{"default": 0}, current: slots{0, 1, 3}, - shouldTrash: slots{0, 1, 3}}) + shouldTrash: slots{0, 1, 3}, + expectBlockState: &balancedBlockState{ + unneeded: 3, + }}) } func (bal *balancerSuite) TestIncreaseRepl(c *check.C) { bal.try(c, tester{ desired: map[string]int{"default": 4}, current: slots{0, 1}, - shouldPull: slots{2, 3}}) + shouldPull: slots{2, 3}, + expectBlockState: &balancedBlockState{ + needed: 2, + pulling: 2, + }}) } func (bal *balancerSuite) TestSkipReadonly(c *check.C) { @@ -128,7 +148,121 @@ func (bal *balancerSuite) TestSkipReadonly(c *check.C) { bal.try(c, tester{ desired: map[string]int{"default": 4}, current: slots{0, 1}, - shouldPull: slots{2, 4}}) + shouldPull: slots{2, 4}, + expectBlockState: &balancedBlockState{ + needed: 2, + pulling: 2, + }}) +} + +func (bal *balancerSuite) TestAllowTrashWhenReadOnly(c *check.C) { + srvs := bal.srvList(0, slots{3}) + srvs[0].mounts[0].KeepMount.AllowWrite = false + srvs[0].mounts[0].KeepMount.AllowTrash = true + // can't pull to slot 3, so pull to slot 4 instead + bal.try(c, tester{ + desired: map[string]int{"default": 4}, + current: slots{0, 1}, + shouldPull: slots{2, 4}, + expectBlockState: &balancedBlockState{ + needed: 2, + pulling: 2, + }}) + // expect to be able to trash slot 3 in future, so pull to + // slot 1 + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{0, 3}, + shouldPull: slots{1}, + expectBlockState: &balancedBlockState{ + needed: 2, + pulling: 1, + }}) + // trash excess from slot 3 + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{0, 1, 3}, + shouldTrash: slots{3}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 1, + }}) +} + +func (bal *balancerSuite) TestMultipleViewsReadOnly(c *check.C) { + bal.testMultipleViews(c, false, false) +} + +func (bal *balancerSuite) TestMultipleViewsReadOnlyAllowTrash(c *check.C) { + bal.testMultipleViews(c, false, true) +} + +func (bal *balancerSuite) TestMultipleViews(c *check.C) { + bal.testMultipleViews(c, true, true) +} + +func (bal *balancerSuite) testMultipleViews(c *check.C, allowWrite, allowTrash bool) { + for i, srv := range bal.srvs { + // Add a mount to each service + srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i) + srv.mounts = append(srv.mounts, &KeepMount{ + KeepMount: arvados.KeepMount{ + DeviceID: bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.DeviceID, + UUID: bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.UUID, + AllowWrite: allowWrite, + AllowTrash: allowTrash, + Replication: 1, + StorageClasses: map[string]bool{"default": true}, + }, + KeepService: srv, + }) + } + for i := 1; i < len(bal.srvs); i++ { + c.Logf("i=%d", i) + if i == 4 { + // Timestamps are all different, but one of + // the mounts on srv[4] has the same device ID + // where the non-deletable replica is stored + // on srv[3], so only one replica is safe to + // trash. + bal.try(c, tester{ + desired: map[string]int{"default": 1}, + current: slots{0, i, i}, + shouldTrash: slots{i}}) + } else if !allowTrash { + // Timestamps are all different, and the third + // replica can't be trashed because it's on a + // read-only mount (with + // AllowTrashWhenReadOnly=false), so the first + // two replicas should be trashed. + bal.try(c, tester{ + desired: map[string]int{"default": 1}, + current: slots{0, i, i}, + shouldTrash: slots{0, i}}) + } else { + // Timestamps are all different, so both + // replicas on the non-optimal server should + // be trashed. + bal.try(c, tester{ + desired: map[string]int{"default": 1}, + current: slots{0, i, i}, + shouldTrash: slots{i, i}}) + } + // If the three replicas have identical timestamps, + // none of them can be trashed safely. + bal.try(c, tester{ + desired: map[string]int{"default": 1}, + current: slots{0, i, i}, + timestamps: []int64{12345678, 12345678, 12345678}}) + // If the first and third replicas have identical + // timestamps, only the second replica should be + // trashed. + bal.try(c, tester{ + desired: map[string]int{"default": 1}, + current: slots{0, i, i}, + timestamps: []int64{12345678, 12345679, 12345678}, + shouldTrash: slots{i}}) + } } func (bal *balancerSuite) TestFixUnbalanced(c *check.C) { @@ -161,9 +295,10 @@ func (bal *balancerSuite) TestFixUnbalanced(c *check.C) { } func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) { - for _, srv := range bal.srvs { + for s, srv := range bal.srvs { for i := 0; i < 3; i++ { m := *(srv.mounts[0]) + m.UUID = fmt.Sprintf("zzzzz-mount-%015x", (s<<10)+i) srv.mounts = append(srv.mounts, &m) } } @@ -228,6 +363,35 @@ func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) { desired: map[string]int{"default": 2}, current: slots{0, 1, 2}, timestamps: []int64{12345678, 10000000, 10000000}}) + bal.try(c, tester{ + desired: map[string]int{"default": 0}, + current: slots{0, 1, 2}, + timestamps: []int64{12345678, 12345678, 12345678}, + shouldTrash: slots{0}, + shouldTrashMounts: []string{ + bal.srvs[bal.knownRendezvous[0][0]].mounts[0].UUID}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{0, 1, 2, 5, 6}, + timestamps: []int64{12345678, 12345679, 10000000, 10000000, 10000000}, + shouldTrash: slots{2}, + shouldTrashMounts: []string{ + bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{0, 1, 2, 5, 6}, + timestamps: []int64{12345678, 12345679, 12345671, 10000000, 10000000}, + shouldTrash: slots{2, 5}, + shouldTrashMounts: []string{ + bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID, + bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{0, 1, 2, 5, 6}, + timestamps: []int64{12345678, 12345679, 12345679, 10000000, 10000000}, + shouldTrash: slots{5}, + shouldTrashMounts: []string{ + bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}}) } func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) { @@ -237,7 +401,11 @@ func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) { bal.try(c, tester{ desired: map[string]int{"default": 2}, current: slots{0, 1, 2}, - timestamps: []int64{oldTime, newTime, newTime + 1}}) + timestamps: []int64{oldTime, newTime, newTime + 1}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 1, + }}) // The best replicas are too new to delete, but the excess // replica is old enough. bal.try(c, tester{ @@ -247,12 +415,13 @@ func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) { shouldTrash: slots{2}}) } -func (bal *balancerSuite) TestDedupDevices(c *check.C) { - bal.srvs[3].mounts[0].KeepMount.ReadOnly = true +func (bal *balancerSuite) TestCleanupMounts(c *check.C) { + bal.srvs[3].mounts[0].KeepMount.AllowWrite = false bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef" + bal.srvs[14].mounts[0].KeepMount.UUID = bal.srvs[3].mounts[0].KeepMount.UUID bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef" c.Check(len(bal.srvs[3].mounts), check.Equals, 1) - bal.dedupDevices() + bal.cleanupMounts() c.Check(len(bal.srvs[3].mounts), check.Equals, 0) bal.try(c, tester{ known: 0, @@ -261,33 +430,159 @@ func (bal *balancerSuite) TestDedupDevices(c *check.C) { shouldPull: slots{2}}) } +func (bal *balancerSuite) TestVolumeReplication(c *check.C) { + bal.srvs[0].mounts[0].KeepMount.Replication = 2 // srv 0 + bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e + bal.cleanupMounts() + // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2 + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2}, + current: slots{1}, + shouldPull: slots{0}, + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 1, + }}) + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2}, + current: slots{0, 1}, + shouldPull: nil, + expectBlockState: &balancedBlockState{ + needed: 2, + }}) + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2}, + current: slots{0, 1, 2}, + shouldTrash: slots{2}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 1, + }}) + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 3}, + current: slots{0, 2, 3, 4}, + shouldPull: slots{1}, + shouldTrash: slots{4}, + expectBlockState: &balancedBlockState{ + needed: 3, + unneeded: 1, + pulling: 1, + }}) + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 3}, + current: slots{0, 1, 2, 3, 4}, + shouldTrash: slots{2, 3, 4}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 3, + }}) + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 4}, + current: slots{0, 1, 2, 3, 4}, + shouldTrash: slots{3, 4}, + expectBlockState: &balancedBlockState{ + needed: 3, + unneeded: 2, + }}) + // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2 + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 2}, + current: slots{0}, + expectBlockState: &balancedBlockState{ + needed: 1, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 3}, + current: slots{0}, + shouldPull: slots{1}, + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 1, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 4}, + current: slots{0}, + shouldPull: slots{1, 2}, + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 2, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 4}, + current: slots{2}, + shouldPull: slots{0, 1}, + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 2, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 4}, + current: slots{7}, + shouldPull: slots{0, 1, 2}, + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 3, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 2}, + current: slots{1, 2, 3, 4}, + shouldPull: slots{0}, + shouldTrash: slots{3, 4}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 2, + pulling: 1, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 2}, + current: slots{0, 1, 2}, + shouldTrash: slots{1, 2}, + expectBlockState: &balancedBlockState{ + needed: 1, + unneeded: 2, + }}) +} + func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) { - bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef" - bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef" - bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef" + dupUUID := bal.srvs[0].mounts[0].KeepMount.UUID + bal.srvs[9].mounts[0].KeepMount.UUID = dupUUID + bal.srvs[14].mounts[0].KeepMount.UUID = dupUUID // block 0 belongs on servers 3 and e, which have different - // device IDs. + // UUIDs. bal.try(c, tester{ known: 0, desired: map[string]int{"default": 2}, current: slots{1}, shouldPull: slots{0}}) // block 1 belongs on servers 0 and 9, which both report - // having a replica, but the replicas are on the same device - // ID -- so we should pull to the third position (7). + // having a replica, but the replicas are on the same volume + // -- so we should pull to the third position (7). bal.try(c, tester{ known: 1, desired: map[string]int{"default": 2}, current: slots{0, 1}, shouldPull: slots{2}}) - // block 1 can be pulled to the doubly-mounted device, but the + // block 1 can be pulled to the doubly-mounted volume, but the // pull should only be done on the first of the two servers. bal.try(c, tester{ known: 1, desired: map[string]int{"default": 2}, current: slots{2}, shouldPull: slots{0}}) - // block 0 has one replica on a single device mounted on two + // block 0 has one replica on a single volume mounted on two // servers (e,9 at positions 1,9). Trashing the replica on 9 // would lose the block. bal.try(c, tester{ @@ -295,25 +590,20 @@ func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) { desired: map[string]int{"default": 2}, current: slots{1, 9}, shouldPull: slots{0}, - expectResult: balanceResult{ - have: 1, - classState: map[string]balancedBlockState{"default": { - desired: 2, - surplus: -1, - unachievable: false}}}}) + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 1, + }}) // block 0 is overreplicated, but the second and third - // replicas are the same replica according to DeviceID + // replicas are the same replica according to volume UUID // (despite different Mtimes). Don't trash the third replica. bal.try(c, tester{ known: 0, desired: map[string]int{"default": 2}, current: slots{0, 1, 9}, - expectResult: balanceResult{ - have: 2, - classState: map[string]balancedBlockState{"default": { - desired: 2, - surplus: 0, - unachievable: false}}}}) + expectBlockState: &balancedBlockState{ + needed: 2, + }}) // block 0 is overreplicated; the third and fifth replicas are // extra, but the fourth is another view of the second and // shouldn't be trashed. @@ -322,12 +612,10 @@ func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) { desired: map[string]int{"default": 2}, current: slots{0, 1, 5, 9, 12}, shouldTrash: slots{5, 12}, - expectResult: balanceResult{ - have: 4, - classState: map[string]balancedBlockState{"default": { - desired: 2, - surplus: 2, - unachievable: false}}}}) + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 2, + }}) } func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { @@ -337,16 +625,20 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { // classes=[special,special2]. bal.srvs[9].mounts = []*KeepMount{{ KeepMount: arvados.KeepMount{ + AllowWrite: true, + AllowTrash: true, Replication: 1, - StorageClasses: []string{"special"}, + StorageClasses: map[string]bool{"special": true}, UUID: "zzzzz-mount-special00000009", DeviceID: "9-special", }, KeepService: bal.srvs[9], }, { KeepMount: arvados.KeepMount{ + AllowWrite: true, + AllowTrash: true, Replication: 1, - StorageClasses: []string{"special", "special2"}, + StorageClasses: map[string]bool{"special": true, "special2": true}, UUID: "zzzzz-mount-special20000009", DeviceID: "9-special-and-special2", }, @@ -357,16 +649,20 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { // classes=[special3], one with classes=[default]. bal.srvs[13].mounts = []*KeepMount{{ KeepMount: arvados.KeepMount{ + AllowWrite: true, + AllowTrash: true, Replication: 1, - StorageClasses: []string{"special2"}, + StorageClasses: map[string]bool{"special2": true}, UUID: "zzzzz-mount-special2000000d", DeviceID: "13-special2", }, KeepService: bal.srvs[13], }, { KeepMount: arvados.KeepMount{ + AllowWrite: true, + AllowTrash: true, Replication: 1, - StorageClasses: []string{"default"}, + StorageClasses: map[string]bool{"default": true}, UUID: "zzzzz-mount-00000000000000d", DeviceID: "13-default", }, @@ -379,7 +675,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { desired: map[string]int{"default": 2, "special": 1}, current: slots{0, 1}, shouldPull: slots{9}, - shouldPullMounts: []string{"zzzzz-mount-special00000009"}}) + shouldPullMounts: []string{"zzzzz-mount-special20000009"}}) // If some storage classes are not satisfied, don't trash any // excess replicas. (E.g., if someone desires repl=1 on // class=durable, and we have two copies on class=volatile, we @@ -389,7 +685,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { desired: map[string]int{"special": 1}, current: slots{0, 1}, shouldPull: slots{9}, - shouldPullMounts: []string{"zzzzz-mount-special00000009"}}) + shouldPullMounts: []string{"zzzzz-mount-special20000009"}}) // Once storage classes are satisfied, trash excess replicas // that appear earlier in probe order but aren't needed to // satisfy the desired classes. @@ -494,17 +790,17 @@ func (bal *balancerSuite) try(c *check.C, t tester) { sort.Strings(didTrashMounts) c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts) } - if t.expectResult.have > 0 { - c.Check(result.have, check.Equals, t.expectResult.have) + if t.expectBlockState != nil { + c.Check(result.blockState, check.Equals, *t.expectBlockState) } - if t.expectResult.classState != nil { - c.Check(result.classState, check.DeepEquals, t.expectResult.classState) + if t.expectClassState != nil { + c.Check(result.classState, check.DeepEquals, t.expectClassState) } } // srvList returns the KeepServices, sorted in rendezvous order and // then selected by idx. For example, srvList(3, slots{0, 1, 4}) -// returns the the first-, second-, and fifth-best servers for storing +// returns the first-, second-, and fifth-best servers for storing // bal.knownBlkid(3). func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) { for _, i := range order {