21126: Test keep-balance behavior with AllowTrashWhenReadOnly.
[arvados.git] / services / keep-balance / balance_test.go
index 6cffa8ded4dbad6975225949e871852e5ca2d50e..e5bdf9c023d26fd5e631820488ea28f98c3ed65f 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package keepbalance
 
 import (
        "crypto/md5"
@@ -12,8 +12,8 @@ import (
        "testing"
        "time"
 
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
        check "gopkg.in/check.v1"
 )
 
@@ -50,7 +50,8 @@ type tester struct {
        shouldPullMounts  []string
        shouldTrashMounts []string
 
-       expectResult balanceResult
+       expectBlockState *balancedBlockState
+       expectClassState map[string]balancedBlockState
 }
 
 func (bal *balancerSuite) SetUpSuite(c *check.C) {
@@ -84,7 +85,10 @@ func (bal *balancerSuite) SetUpTest(c *check.C) {
                }
                srv.mounts = []*KeepMount{{
                        KeepMount: arvados.KeepMount{
-                               UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
+                               UUID:           fmt.Sprintf("zzzzz-mount-%015x", i),
+                               StorageClasses: map[string]bool{"default": true},
+                               AllowWrite:     true,
+                               AllowTrash:     true,
                        },
                        KeepService: srv,
                }}
@@ -101,28 +105,42 @@ func (bal *balancerSuite) TestPerfect(c *check.C) {
                desired:     map[string]int{"default": 2},
                current:     slots{0, 1},
                shouldPull:  nil,
-               shouldTrash: nil})
+               shouldTrash: nil,
+               expectBlockState: &balancedBlockState{
+                       needed: 2,
+               }})
 }
 
 func (bal *balancerSuite) TestDecreaseRepl(c *check.C) {
        bal.try(c, tester{
                desired:     map[string]int{"default": 2},
                current:     slots{0, 2, 1},
-               shouldTrash: slots{2}})
+               shouldTrash: slots{2},
+               expectBlockState: &balancedBlockState{
+                       needed:   2,
+                       unneeded: 1,
+               }})
 }
 
 func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) {
        bal.try(c, tester{
                desired:     map[string]int{"default": 0},
                current:     slots{0, 1, 3},
-               shouldTrash: slots{0, 1, 3}})
+               shouldTrash: slots{0, 1, 3},
+               expectBlockState: &balancedBlockState{
+                       unneeded: 3,
+               }})
 }
 
 func (bal *balancerSuite) TestIncreaseRepl(c *check.C) {
        bal.try(c, tester{
                desired:    map[string]int{"default": 4},
                current:    slots{0, 1},
-               shouldPull: slots{2, 3}})
+               shouldPull: slots{2, 3},
+               expectBlockState: &balancedBlockState{
+                       needed:  2,
+                       pulling: 2,
+               }})
 }
 
 func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
@@ -130,27 +148,71 @@ func (bal *balancerSuite) TestSkipReadonly(c *check.C) {
        bal.try(c, tester{
                desired:    map[string]int{"default": 4},
                current:    slots{0, 1},
-               shouldPull: slots{2, 4}})
+               shouldPull: slots{2, 4},
+               expectBlockState: &balancedBlockState{
+                       needed:  2,
+                       pulling: 2,
+               }})
+}
+
+func (bal *balancerSuite) TestAllowTrashWhenReadOnly(c *check.C) {
+       srvs := bal.srvList(0, slots{3})
+       srvs[0].mounts[0].KeepMount.AllowWrite = false
+       srvs[0].mounts[0].KeepMount.AllowTrash = true
+       // can't pull to slot 3, so pull to slot 4 instead
+       bal.try(c, tester{
+               desired:    map[string]int{"default": 4},
+               current:    slots{0, 1},
+               shouldPull: slots{2, 4},
+               expectBlockState: &balancedBlockState{
+                       needed:  2,
+                       pulling: 2,
+               }})
+       // expect to be able to trash slot 3 in future, so pull to
+       // slot 1
+       bal.try(c, tester{
+               desired:    map[string]int{"default": 2},
+               current:    slots{0, 3},
+               shouldPull: slots{1},
+               expectBlockState: &balancedBlockState{
+                       needed:  2,
+                       pulling: 1,
+               }})
+       // trash excess from slot 3
+       bal.try(c, tester{
+               desired:     map[string]int{"default": 2},
+               current:     slots{0, 1, 3},
+               shouldTrash: slots{3},
+               expectBlockState: &balancedBlockState{
+                       needed:   2,
+                       unneeded: 1,
+               }})
 }
 
 func (bal *balancerSuite) TestMultipleViewsReadOnly(c *check.C) {
-       bal.testMultipleViews(c, true)
+       bal.testMultipleViews(c, false, false)
+}
+
+func (bal *balancerSuite) TestMultipleViewsReadOnlyAllowTrash(c *check.C) {
+       bal.testMultipleViews(c, false, true)
 }
 
 func (bal *balancerSuite) TestMultipleViews(c *check.C) {
-       bal.testMultipleViews(c, false)
+       bal.testMultipleViews(c, true, true)
 }
 
-func (bal *balancerSuite) testMultipleViews(c *check.C, readonly bool) {
+func (bal *balancerSuite) testMultipleViews(c *check.C, allowWrite, allowTrash bool) {
        for i, srv := range bal.srvs {
                // Add a mount to each service
                srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i)
                srv.mounts = append(srv.mounts, &KeepMount{
                        KeepMount: arvados.KeepMount{
-                               DeviceID:    fmt.Sprintf("writable-by-srv-%x", (i+1)%len(bal.srvs)),
-                               UUID:        fmt.Sprintf("zzzzz-mount-%015x", i<<16),
-                               ReadOnly:    readonly,
-                               Replication: 1,
+                               DeviceID:       bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.DeviceID,
+                               UUID:           bal.srvs[(i+1)%len(bal.srvs)].mounts[0].KeepMount.UUID,
+                               AllowWrite:     allowWrite,
+                               AllowTrash:     allowTrash,
+                               Replication:    1,
+                               StorageClasses: map[string]bool{"default": true},
                        },
                        KeepService: srv,
                })
@@ -167,11 +229,12 @@ func (bal *balancerSuite) testMultipleViews(c *check.C, readonly bool) {
                                desired:     map[string]int{"default": 1},
                                current:     slots{0, i, i},
                                shouldTrash: slots{i}})
-               } else if readonly {
+               } else if !allowTrash {
                        // Timestamps are all different, and the third
                        // replica can't be trashed because it's on a
-                       // read-only mount, so the first two replicas
-                       // should be trashed.
+                       // read-only mount (with
+                       // AllowTrashWhenReadOnly=false), so the first
+                       // two replicas should be trashed.
                        bal.try(c, tester{
                                desired:     map[string]int{"default": 1},
                                current:     slots{0, i, i},
@@ -300,6 +363,35 @@ func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) {
                desired:    map[string]int{"default": 2},
                current:    slots{0, 1, 2},
                timestamps: []int64{12345678, 10000000, 10000000}})
+       bal.try(c, tester{
+               desired:     map[string]int{"default": 0},
+               current:     slots{0, 1, 2},
+               timestamps:  []int64{12345678, 12345678, 12345678},
+               shouldTrash: slots{0},
+               shouldTrashMounts: []string{
+                       bal.srvs[bal.knownRendezvous[0][0]].mounts[0].UUID}})
+       bal.try(c, tester{
+               desired:     map[string]int{"default": 2},
+               current:     slots{0, 1, 2, 5, 6},
+               timestamps:  []int64{12345678, 12345679, 10000000, 10000000, 10000000},
+               shouldTrash: slots{2},
+               shouldTrashMounts: []string{
+                       bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID}})
+       bal.try(c, tester{
+               desired:     map[string]int{"default": 2},
+               current:     slots{0, 1, 2, 5, 6},
+               timestamps:  []int64{12345678, 12345679, 12345671, 10000000, 10000000},
+               shouldTrash: slots{2, 5},
+               shouldTrashMounts: []string{
+                       bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID,
+                       bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
+       bal.try(c, tester{
+               desired:     map[string]int{"default": 2},
+               current:     slots{0, 1, 2, 5, 6},
+               timestamps:  []int64{12345678, 12345679, 12345679, 10000000, 10000000},
+               shouldTrash: slots{5},
+               shouldTrashMounts: []string{
+                       bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}})
 }
 
 func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
@@ -310,13 +402,10 @@ func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
                desired:    map[string]int{"default": 2},
                current:    slots{0, 1, 2},
                timestamps: []int64{oldTime, newTime, newTime + 1},
-               expectResult: balanceResult{
-                       have: 3,
-                       want: 2,
-                       classState: map[string]balancedBlockState{"default": {
-                               desired:      2,
-                               surplus:      1,
-                               unachievable: false}}}})
+               expectBlockState: &balancedBlockState{
+                       needed:   2,
+                       unneeded: 1,
+               }})
        // The best replicas are too new to delete, but the excess
        // replica is old enough.
        bal.try(c, tester{
@@ -327,8 +416,9 @@ func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
 }
 
 func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
-       bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
+       bal.srvs[3].mounts[0].KeepMount.AllowWrite = false
        bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
+       bal.srvs[14].mounts[0].KeepMount.UUID = bal.srvs[3].mounts[0].KeepMount.UUID
        bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
        c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
        bal.cleanupMounts()
@@ -349,133 +439,150 @@ func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
                known:      0,
                desired:    map[string]int{"default": 2},
                current:    slots{1},
-               shouldPull: slots{0}})
+               shouldPull: slots{0},
+               expectBlockState: &balancedBlockState{
+                       needed:  1,
+                       pulling: 1,
+               }})
        bal.try(c, tester{
                known:      0,
                desired:    map[string]int{"default": 2},
                current:    slots{0, 1},
-               shouldPull: nil})
+               shouldPull: nil,
+               expectBlockState: &balancedBlockState{
+                       needed: 2,
+               }})
        bal.try(c, tester{
                known:       0,
                desired:     map[string]int{"default": 2},
                current:     slots{0, 1, 2},
-               shouldTrash: slots{2}})
+               shouldTrash: slots{2},
+               expectBlockState: &balancedBlockState{
+                       needed:   2,
+                       unneeded: 1,
+               }})
        bal.try(c, tester{
                known:       0,
                desired:     map[string]int{"default": 3},
                current:     slots{0, 2, 3, 4},
                shouldPull:  slots{1},
                shouldTrash: slots{4},
-               expectResult: balanceResult{
-                       have: 4,
-                       want: 3,
-                       classState: map[string]balancedBlockState{"default": {
-                               desired:      3,
-                               surplus:      1,
-                               unachievable: false}}}})
+               expectBlockState: &balancedBlockState{
+                       needed:   3,
+                       unneeded: 1,
+                       pulling:  1,
+               }})
        bal.try(c, tester{
                known:       0,
                desired:     map[string]int{"default": 3},
                current:     slots{0, 1, 2, 3, 4},
-               shouldTrash: slots{2, 3, 4}})
+               shouldTrash: slots{2, 3, 4},
+               expectBlockState: &balancedBlockState{
+                       needed:   2,
+                       unneeded: 3,
+               }})
        bal.try(c, tester{
                known:       0,
                desired:     map[string]int{"default": 4},
                current:     slots{0, 1, 2, 3, 4},
                shouldTrash: slots{3, 4},
-               expectResult: balanceResult{
-                       have: 6,
-                       want: 4,
-                       classState: map[string]balancedBlockState{"default": {
-                               desired:      4,
-                               surplus:      2,
-                               unachievable: false}}}})
+               expectBlockState: &balancedBlockState{
+                       needed:   3,
+                       unneeded: 2,
+               }})
        // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2
        bal.try(c, tester{
                known:   1,
                desired: map[string]int{"default": 2},
                current: slots{0},
-               expectResult: balanceResult{
-                       have: 2,
-                       want: 2,
-                       classState: map[string]balancedBlockState{"default": {
-                               desired:      2,
-                               surplus:      0,
-                               unachievable: false}}}})
+               expectBlockState: &balancedBlockState{
+                       needed: 1,
+               }})
        bal.try(c, tester{
                known:      1,
                desired:    map[string]int{"default": 3},
                current:    slots{0},
-               shouldPull: slots{1}})
+               shouldPull: slots{1},
+               expectBlockState: &balancedBlockState{
+                       needed:  1,
+                       pulling: 1,
+               }})
        bal.try(c, tester{
                known:      1,
                desired:    map[string]int{"default": 4},
                current:    slots{0},
-               shouldPull: slots{1, 2}})
+               shouldPull: slots{1, 2},
+               expectBlockState: &balancedBlockState{
+                       needed:  1,
+                       pulling: 2,
+               }})
        bal.try(c, tester{
                known:      1,
                desired:    map[string]int{"default": 4},
                current:    slots{2},
-               shouldPull: slots{0, 1}})
+               shouldPull: slots{0, 1},
+               expectBlockState: &balancedBlockState{
+                       needed:  1,
+                       pulling: 2,
+               }})
        bal.try(c, tester{
                known:      1,
                desired:    map[string]int{"default": 4},
                current:    slots{7},
                shouldPull: slots{0, 1, 2},
-               expectResult: balanceResult{
-                       have: 1,
-                       want: 4,
-                       classState: map[string]balancedBlockState{"default": {
-                               desired:      4,
-                               surplus:      -3,
-                               unachievable: false}}}})
+               expectBlockState: &balancedBlockState{
+                       needed:  1,
+                       pulling: 3,
+               }})
        bal.try(c, tester{
                known:       1,
                desired:     map[string]int{"default": 2},
                current:     slots{1, 2, 3, 4},
                shouldPull:  slots{0},
-               shouldTrash: slots{3, 4}})
+               shouldTrash: slots{3, 4},
+               expectBlockState: &balancedBlockState{
+                       needed:   2,
+                       unneeded: 2,
+                       pulling:  1,
+               }})
        bal.try(c, tester{
                known:       1,
                desired:     map[string]int{"default": 2},
                current:     slots{0, 1, 2},
                shouldTrash: slots{1, 2},
-               expectResult: balanceResult{
-                       have: 4,
-                       want: 2,
-                       classState: map[string]balancedBlockState{"default": {
-                               desired:      2,
-                               surplus:      2,
-                               unachievable: false}}}})
+               expectBlockState: &balancedBlockState{
+                       needed:   1,
+                       unneeded: 2,
+               }})
 }
 
 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
-       bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
-       bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
-       bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
+       dupUUID := bal.srvs[0].mounts[0].KeepMount.UUID
+       bal.srvs[9].mounts[0].KeepMount.UUID = dupUUID
+       bal.srvs[14].mounts[0].KeepMount.UUID = dupUUID
        // block 0 belongs on servers 3 and e, which have different
-       // device IDs.
+       // UUIDs.
        bal.try(c, tester{
                known:      0,
                desired:    map[string]int{"default": 2},
                current:    slots{1},
                shouldPull: slots{0}})
        // block 1 belongs on servers 0 and 9, which both report
-       // having a replica, but the replicas are on the same device
-       // ID -- so we should pull to the third position (7).
+       // having a replica, but the replicas are on the same volume
+       // -- so we should pull to the third position (7).
        bal.try(c, tester{
                known:      1,
                desired:    map[string]int{"default": 2},
                current:    slots{0, 1},
                shouldPull: slots{2}})
-       // block 1 can be pulled to the doubly-mounted device, but the
+       // block 1 can be pulled to the doubly-mounted volume, but the
        // pull should only be done on the first of the two servers.
        bal.try(c, tester{
                known:      1,
                desired:    map[string]int{"default": 2},
                current:    slots{2},
                shouldPull: slots{0}})
-       // block 0 has one replica on a single device mounted on two
+       // block 0 has one replica on a single volume mounted on two
        // servers (e,9 at positions 1,9). Trashing the replica on 9
        // would lose the block.
        bal.try(c, tester{
@@ -483,25 +590,20 @@ func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
                desired:    map[string]int{"default": 2},
                current:    slots{1, 9},
                shouldPull: slots{0},
-               expectResult: balanceResult{
-                       have: 1,
-                       classState: map[string]balancedBlockState{"default": {
-                               desired:      2,
-                               surplus:      -1,
-                               unachievable: false}}}})
+               expectBlockState: &balancedBlockState{
+                       needed:  1,
+                       pulling: 1,
+               }})
        // block 0 is overreplicated, but the second and third
-       // replicas are the same replica according to DeviceID
+       // replicas are the same replica according to volume UUID
        // (despite different Mtimes). Don't trash the third replica.
        bal.try(c, tester{
                known:   0,
                desired: map[string]int{"default": 2},
                current: slots{0, 1, 9},
-               expectResult: balanceResult{
-                       have: 2,
-                       classState: map[string]balancedBlockState{"default": {
-                               desired:      2,
-                               surplus:      0,
-                               unachievable: false}}}})
+               expectBlockState: &balancedBlockState{
+                       needed: 2,
+               }})
        // block 0 is overreplicated; the third and fifth replicas are
        // extra, but the fourth is another view of the second and
        // shouldn't be trashed.
@@ -510,12 +612,10 @@ func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
                desired:     map[string]int{"default": 2},
                current:     slots{0, 1, 5, 9, 12},
                shouldTrash: slots{5, 12},
-               expectResult: balanceResult{
-                       have: 4,
-                       classState: map[string]balancedBlockState{"default": {
-                               desired:      2,
-                               surplus:      2,
-                               unachievable: false}}}})
+               expectBlockState: &balancedBlockState{
+                       needed:   2,
+                       unneeded: 2,
+               }})
 }
 
 func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
@@ -525,6 +625,8 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
        // classes=[special,special2].
        bal.srvs[9].mounts = []*KeepMount{{
                KeepMount: arvados.KeepMount{
+                       AllowWrite:     true,
+                       AllowTrash:     true,
                        Replication:    1,
                        StorageClasses: map[string]bool{"special": true},
                        UUID:           "zzzzz-mount-special00000009",
@@ -533,6 +635,8 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
                KeepService: bal.srvs[9],
        }, {
                KeepMount: arvados.KeepMount{
+                       AllowWrite:     true,
+                       AllowTrash:     true,
                        Replication:    1,
                        StorageClasses: map[string]bool{"special": true, "special2": true},
                        UUID:           "zzzzz-mount-special20000009",
@@ -545,6 +649,8 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
        // classes=[special3], one with classes=[default].
        bal.srvs[13].mounts = []*KeepMount{{
                KeepMount: arvados.KeepMount{
+                       AllowWrite:     true,
+                       AllowTrash:     true,
                        Replication:    1,
                        StorageClasses: map[string]bool{"special2": true},
                        UUID:           "zzzzz-mount-special2000000d",
@@ -553,6 +659,8 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
                KeepService: bal.srvs[13],
        }, {
                KeepMount: arvados.KeepMount{
+                       AllowWrite:     true,
+                       AllowTrash:     true,
                        Replication:    1,
                        StorageClasses: map[string]bool{"default": true},
                        UUID:           "zzzzz-mount-00000000000000d",
@@ -567,7 +675,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
                desired:          map[string]int{"default": 2, "special": 1},
                current:          slots{0, 1},
                shouldPull:       slots{9},
-               shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
+               shouldPullMounts: []string{"zzzzz-mount-special20000009"}})
        // If some storage classes are not satisfied, don't trash any
        // excess replicas. (E.g., if someone desires repl=1 on
        // class=durable, and we have two copies on class=volatile, we
@@ -577,7 +685,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
                desired:          map[string]int{"special": 1},
                current:          slots{0, 1},
                shouldPull:       slots{9},
-               shouldPullMounts: []string{"zzzzz-mount-special00000009"}})
+               shouldPullMounts: []string{"zzzzz-mount-special20000009"}})
        // Once storage classes are satisfied, trash excess replicas
        // that appear earlier in probe order but aren't needed to
        // satisfy the desired classes.
@@ -682,14 +790,11 @@ func (bal *balancerSuite) try(c *check.C, t tester) {
                sort.Strings(didTrashMounts)
                c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts)
        }
-       if t.expectResult.have > 0 {
-               c.Check(result.have, check.Equals, t.expectResult.have)
-       }
-       if t.expectResult.want > 0 {
-               c.Check(result.want, check.Equals, t.expectResult.want)
+       if t.expectBlockState != nil {
+               c.Check(result.blockState, check.Equals, *t.expectBlockState)
        }
-       if t.expectResult.classState != nil {
-               c.Check(result.classState, check.DeepEquals, t.expectResult.classState)
+       if t.expectClassState != nil {
+               c.Check(result.classState, check.DeepEquals, t.expectClassState)
        }
 }