X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/932e3d6e9a899cc662ea3934b79057d39cd88fed..1e13d4ecffd4d335eb7918a7b9bd992f3f192cd1:/services/keep-balance/balance_test.go diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go index cfdd47fc91..5bc66dbf3f 100644 --- a/services/keep-balance/balance_test.go +++ b/services/keep-balance/balance_test.go @@ -12,8 +12,8 @@ import ( "testing" "time" - "git.curoverse.com/arvados.git/sdk/go/arvados" - + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/ctxlog" check "gopkg.in/check.v1" ) @@ -49,6 +49,9 @@ type tester struct { shouldPullMounts []string shouldTrashMounts []string + + expectBlockState *balancedBlockState + expectClassState map[string]balancedBlockState } func (bal *balancerSuite) SetUpSuite(c *check.C) { @@ -68,6 +71,7 @@ func (bal *balancerSuite) SetUpSuite(c *check.C) { } bal.signatureTTL = 3600 + bal.Logger = ctxlog.TestLogger(c) } func (bal *balancerSuite) SetUpTest(c *check.C) { @@ -90,6 +94,7 @@ func (bal *balancerSuite) SetUpTest(c *check.C) { } bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9 + bal.cleanupMounts() } func (bal *balancerSuite) TestPerfect(c *check.C) { @@ -97,28 +102,42 @@ func (bal *balancerSuite) TestPerfect(c *check.C) { desired: map[string]int{"default": 2}, current: slots{0, 1}, shouldPull: nil, - shouldTrash: nil}) + shouldTrash: nil, + expectBlockState: &balancedBlockState{ + needed: 2, + }}) } func (bal *balancerSuite) TestDecreaseRepl(c *check.C) { bal.try(c, tester{ desired: map[string]int{"default": 2}, current: slots{0, 2, 1}, - shouldTrash: slots{2}}) + shouldTrash: slots{2}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 1, + }}) } func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) { bal.try(c, tester{ desired: map[string]int{"default": 0}, current: slots{0, 1, 3}, - shouldTrash: slots{0, 1, 3}}) + shouldTrash: slots{0, 1, 3}, + expectBlockState: &balancedBlockState{ + unneeded: 3, + }}) } func (bal *balancerSuite) TestIncreaseRepl(c *check.C) { bal.try(c, tester{ desired: map[string]int{"default": 4}, current: slots{0, 1}, - shouldPull: slots{2, 3}}) + shouldPull: slots{2, 3}, + expectBlockState: &balancedBlockState{ + needed: 2, + pulling: 2, + }}) } func (bal *balancerSuite) TestSkipReadonly(c *check.C) { @@ -126,7 +145,80 @@ func (bal *balancerSuite) TestSkipReadonly(c *check.C) { bal.try(c, tester{ desired: map[string]int{"default": 4}, current: slots{0, 1}, - shouldPull: slots{2, 4}}) + shouldPull: slots{2, 4}, + expectBlockState: &balancedBlockState{ + needed: 2, + pulling: 2, + }}) +} + +func (bal *balancerSuite) TestMultipleViewsReadOnly(c *check.C) { + bal.testMultipleViews(c, true) +} + +func (bal *balancerSuite) TestMultipleViews(c *check.C) { + bal.testMultipleViews(c, false) +} + +func (bal *balancerSuite) testMultipleViews(c *check.C, readonly bool) { + for i, srv := range bal.srvs { + // Add a mount to each service + srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i) + srv.mounts = append(srv.mounts, &KeepMount{ + KeepMount: arvados.KeepMount{ + DeviceID: fmt.Sprintf("writable-by-srv-%x", (i+1)%len(bal.srvs)), + UUID: fmt.Sprintf("zzzzz-mount-%015x", i<<16), + ReadOnly: readonly, + Replication: 1, + }, + KeepService: srv, + }) + } + for i := 1; i < len(bal.srvs); i++ { + c.Logf("i=%d", i) + if i == 4 { + // Timestamps are all different, but one of + // the mounts on srv[4] has the same device ID + // where the non-deletable replica is stored + // on srv[3], so only one replica is safe to + // trash. + bal.try(c, tester{ + desired: map[string]int{"default": 1}, + current: slots{0, i, i}, + shouldTrash: slots{i}}) + } else if readonly { + // Timestamps are all different, and the third + // replica can't be trashed because it's on a + // read-only mount, so the first two replicas + // should be trashed. + bal.try(c, tester{ + desired: map[string]int{"default": 1}, + current: slots{0, i, i}, + shouldTrash: slots{0, i}}) + } else { + // Timestamps are all different, so both + // replicas on the non-optimal server should + // be trashed. + bal.try(c, tester{ + desired: map[string]int{"default": 1}, + current: slots{0, i, i}, + shouldTrash: slots{i, i}}) + } + // If the three replicas have identical timestamps, + // none of them can be trashed safely. + bal.try(c, tester{ + desired: map[string]int{"default": 1}, + current: slots{0, i, i}, + timestamps: []int64{12345678, 12345678, 12345678}}) + // If the first and third replicas have identical + // timestamps, only the second replica should be + // trashed. + bal.try(c, tester{ + desired: map[string]int{"default": 1}, + current: slots{0, i, i}, + timestamps: []int64{12345678, 12345679, 12345678}, + shouldTrash: slots{i}}) + } } func (bal *balancerSuite) TestFixUnbalanced(c *check.C) { @@ -159,9 +251,10 @@ func (bal *balancerSuite) TestFixUnbalanced(c *check.C) { } func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) { - for _, srv := range bal.srvs { + for s, srv := range bal.srvs { for i := 0; i < 3; i++ { m := *(srv.mounts[0]) + m.UUID = fmt.Sprintf("zzzzz-mount-%015x", (s<<10)+i) srv.mounts = append(srv.mounts, &m) } } @@ -235,7 +328,11 @@ func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) { bal.try(c, tester{ desired: map[string]int{"default": 2}, current: slots{0, 1, 2}, - timestamps: []int64{oldTime, newTime, newTime + 1}}) + timestamps: []int64{oldTime, newTime, newTime + 1}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 1, + }}) // The best replicas are too new to delete, but the excess // replica is old enough. bal.try(c, tester{ @@ -245,6 +342,208 @@ func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) { shouldTrash: slots{2}}) } +func (bal *balancerSuite) TestCleanupMounts(c *check.C) { + bal.srvs[3].mounts[0].KeepMount.ReadOnly = true + bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef" + bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef" + c.Check(len(bal.srvs[3].mounts), check.Equals, 1) + bal.cleanupMounts() + c.Check(len(bal.srvs[3].mounts), check.Equals, 0) + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2}, + current: slots{1}, + shouldPull: slots{2}}) +} + +func (bal *balancerSuite) TestVolumeReplication(c *check.C) { + bal.srvs[0].mounts[0].KeepMount.Replication = 2 // srv 0 + bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e + bal.cleanupMounts() + // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2 + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2}, + current: slots{1}, + shouldPull: slots{0}, + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 1, + }}) + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2}, + current: slots{0, 1}, + shouldPull: nil, + expectBlockState: &balancedBlockState{ + needed: 2, + }}) + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2}, + current: slots{0, 1, 2}, + shouldTrash: slots{2}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 1, + }}) + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 3}, + current: slots{0, 2, 3, 4}, + shouldPull: slots{1}, + shouldTrash: slots{4}, + expectBlockState: &balancedBlockState{ + needed: 3, + unneeded: 1, + pulling: 1, + }}) + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 3}, + current: slots{0, 1, 2, 3, 4}, + shouldTrash: slots{2, 3, 4}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 3, + }}) + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 4}, + current: slots{0, 1, 2, 3, 4}, + shouldTrash: slots{3, 4}, + expectBlockState: &balancedBlockState{ + needed: 3, + unneeded: 2, + }}) + // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2 + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 2}, + current: slots{0}, + expectBlockState: &balancedBlockState{ + needed: 1, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 3}, + current: slots{0}, + shouldPull: slots{1}, + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 1, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 4}, + current: slots{0}, + shouldPull: slots{1, 2}, + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 2, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 4}, + current: slots{2}, + shouldPull: slots{0, 1}, + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 2, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 4}, + current: slots{7}, + shouldPull: slots{0, 1, 2}, + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 3, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 2}, + current: slots{1, 2, 3, 4}, + shouldPull: slots{0}, + shouldTrash: slots{3, 4}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 2, + pulling: 1, + }}) + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 2}, + current: slots{0, 1, 2}, + shouldTrash: slots{1, 2}, + expectBlockState: &balancedBlockState{ + needed: 1, + unneeded: 2, + }}) +} + +func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) { + bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef" + bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef" + bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef" + // block 0 belongs on servers 3 and e, which have different + // device IDs. + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2}, + current: slots{1}, + shouldPull: slots{0}}) + // block 1 belongs on servers 0 and 9, which both report + // having a replica, but the replicas are on the same device + // ID -- so we should pull to the third position (7). + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 2}, + current: slots{0, 1}, + shouldPull: slots{2}}) + // block 1 can be pulled to the doubly-mounted device, but the + // pull should only be done on the first of the two servers. + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 2}, + current: slots{2}, + shouldPull: slots{0}}) + // block 0 has one replica on a single device mounted on two + // servers (e,9 at positions 1,9). Trashing the replica on 9 + // would lose the block. + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2}, + current: slots{1, 9}, + shouldPull: slots{0}, + expectBlockState: &balancedBlockState{ + needed: 1, + pulling: 1, + }}) + // block 0 is overreplicated, but the second and third + // replicas are the same replica according to DeviceID + // (despite different Mtimes). Don't trash the third replica. + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2}, + current: slots{0, 1, 9}, + expectBlockState: &balancedBlockState{ + needed: 2, + }}) + // block 0 is overreplicated; the third and fifth replicas are + // extra, but the fourth is another view of the second and + // shouldn't be trashed. + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2}, + current: slots{0, 1, 5, 9, 12}, + shouldTrash: slots{5, 12}, + expectBlockState: &balancedBlockState{ + needed: 2, + unneeded: 2, + }}) +} + func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in // probe order. For these tests we give it two mounts, one @@ -253,7 +552,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { bal.srvs[9].mounts = []*KeepMount{{ KeepMount: arvados.KeepMount{ Replication: 1, - StorageClasses: []string{"special"}, + StorageClasses: map[string]bool{"special": true}, UUID: "zzzzz-mount-special00000009", DeviceID: "9-special", }, @@ -261,7 +560,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { }, { KeepMount: arvados.KeepMount{ Replication: 1, - StorageClasses: []string{"special", "special2"}, + StorageClasses: map[string]bool{"special": true, "special2": true}, UUID: "zzzzz-mount-special20000009", DeviceID: "9-special-and-special2", }, @@ -273,7 +572,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { bal.srvs[13].mounts = []*KeepMount{{ KeepMount: arvados.KeepMount{ Replication: 1, - StorageClasses: []string{"special2"}, + StorageClasses: map[string]bool{"special2": true}, UUID: "zzzzz-mount-special2000000d", DeviceID: "13-special2", }, @@ -281,7 +580,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { }, { KeepMount: arvados.KeepMount{ Replication: 1, - StorageClasses: []string{"default"}, + StorageClasses: map[string]bool{"default": true}, UUID: "zzzzz-mount-00000000000000d", DeviceID: "13-default", }, @@ -373,7 +672,7 @@ func (bal *balancerSuite) try(c *check.C, t tester) { for _, srv := range bal.srvs { srv.ChangeSet = &ChangeSet{} } - bal.balanceBlock(knownBlkid(t.known), blk) + result := bal.balanceBlock(knownBlkid(t.known), blk) var didPull, didTrash slots var didPullMounts, didTrashMounts []string @@ -409,11 +708,17 @@ func (bal *balancerSuite) try(c *check.C, t tester) { sort.Strings(didTrashMounts) c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts) } + if t.expectBlockState != nil { + c.Check(result.blockState, check.Equals, *t.expectBlockState) + } + if t.expectClassState != nil { + c.Check(result.classState, check.DeepEquals, t.expectClassState) + } } // srvList returns the KeepServices, sorted in rendezvous order and // then selected by idx. For example, srvList(3, slots{0, 1, 4}) -// returns the the first-, second-, and fifth-best servers for storing +// returns the first-, second-, and fifth-best servers for storing // bal.knownBlkid(3). func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) { for _, i := range order {