X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/257d60253246952b435cea23b1912af80ea2c6d6..5012102bb1854af9ae591c755feb2ea9c874b708:/services/keep-balance/balance_test.go diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go index b93939c052..cfdd47fc91 100644 --- a/services/keep-balance/balance_test.go +++ b/services/keep-balance/balance_test.go @@ -1,3 +1,7 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + package main import ( @@ -37,11 +41,14 @@ type slots []int type tester struct { known int - desired int + desired map[string]int current slots timestamps []int64 shouldPull slots shouldTrash slots + + shouldPullMounts []string + shouldTrashMounts []string } func (bal *balancerSuite) SetUpSuite(c *check.C) { @@ -72,6 +79,12 @@ func (bal *balancerSuite) SetUpTest(c *check.C) { UUID: fmt.Sprintf("zzzzz-bi6l4-%015x", i), }, } + srv.mounts = []*KeepMount{{ + KeepMount: arvados.KeepMount{ + UUID: fmt.Sprintf("zzzzz-mount-%015x", i), + }, + KeepService: srv, + }} bal.srvs[i] = srv bal.KeepServices[srv.UUID] = srv } @@ -81,7 +94,7 @@ func (bal *balancerSuite) SetUpTest(c *check.C) { func (bal *balancerSuite) TestPerfect(c *check.C) { bal.try(c, tester{ - desired: 2, + desired: map[string]int{"default": 2}, current: slots{0, 1}, shouldPull: nil, shouldTrash: nil}) @@ -89,21 +102,21 @@ func (bal *balancerSuite) TestPerfect(c *check.C) { func (bal *balancerSuite) TestDecreaseRepl(c *check.C) { bal.try(c, tester{ - desired: 2, + desired: map[string]int{"default": 2}, current: slots{0, 2, 1}, shouldTrash: slots{2}}) } func (bal *balancerSuite) TestDecreaseReplToZero(c *check.C) { bal.try(c, tester{ - desired: 0, + desired: map[string]int{"default": 0}, current: slots{0, 1, 3}, shouldTrash: slots{0, 1, 3}}) } func (bal *balancerSuite) TestIncreaseRepl(c *check.C) { bal.try(c, tester{ - desired: 4, + desired: map[string]int{"default": 4}, current: slots{0, 1}, shouldPull: slots{2, 3}}) } @@ -111,45 +124,92 @@ func (bal *balancerSuite) TestIncreaseRepl(c *check.C) { func (bal *balancerSuite) TestSkipReadonly(c *check.C) { bal.srvList(0, slots{3})[0].ReadOnly = true bal.try(c, tester{ - desired: 4, + desired: map[string]int{"default": 4}, current: slots{0, 1}, shouldPull: slots{2, 4}}) } func (bal *balancerSuite) TestFixUnbalanced(c *check.C) { bal.try(c, tester{ - desired: 2, + desired: map[string]int{"default": 2}, current: slots{2, 0}, shouldPull: slots{1}}) bal.try(c, tester{ - desired: 2, + desired: map[string]int{"default": 2}, current: slots{2, 7}, shouldPull: slots{0, 1}}) // if only one of the pulls succeeds, we'll see this next: bal.try(c, tester{ - desired: 2, + desired: map[string]int{"default": 2}, current: slots{2, 1, 7}, shouldPull: slots{0}, shouldTrash: slots{7}}) // if both pulls succeed, we'll see this next: bal.try(c, tester{ - desired: 2, + desired: map[string]int{"default": 2}, current: slots{2, 0, 1, 7}, shouldTrash: slots{2, 7}}) // unbalanced + excessive replication => pull + trash bal.try(c, tester{ - desired: 2, + desired: map[string]int{"default": 2}, current: slots{2, 5, 7}, shouldPull: slots{0, 1}, shouldTrash: slots{7}}) } +func (bal *balancerSuite) TestMultipleReplicasPerService(c *check.C) { + for _, srv := range bal.srvs { + for i := 0; i < 3; i++ { + m := *(srv.mounts[0]) + srv.mounts = append(srv.mounts, &m) + } + } + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{0, 0}, + shouldPull: slots{1}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{2, 2}, + shouldPull: slots{0, 1}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{0, 0, 1}, + shouldTrash: slots{0}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{1, 1, 0}, + shouldTrash: slots{1}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{1, 0, 1, 0, 2}, + shouldTrash: slots{0, 1, 2}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{1, 1, 1, 0, 2}, + shouldTrash: slots{1, 1, 2}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{1, 1, 2}, + shouldPull: slots{0}, + shouldTrash: slots{1}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{1, 1, 0}, + timestamps: []int64{12345678, 12345678, 12345679}, + shouldTrash: nil}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{1, 1}, + shouldPull: slots{0}}) +} + func (bal *balancerSuite) TestIncreaseReplTimestampCollision(c *check.C) { // For purposes of increasing replication, we assume identical // replicas are distinct. bal.try(c, tester{ - desired: 4, + desired: map[string]int{"default": 4}, current: slots{0, 1}, timestamps: []int64{12345678, 12345678}, shouldPull: slots{2, 3}}) @@ -159,11 +219,11 @@ func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) { // For purposes of decreasing replication, we assume identical // replicas are NOT distinct. bal.try(c, tester{ - desired: 2, + desired: map[string]int{"default": 2}, current: slots{0, 1, 2}, timestamps: []int64{12345678, 12345678, 12345678}}) bal.try(c, tester{ - desired: 2, + desired: map[string]int{"default": 2}, current: slots{0, 1, 2}, timestamps: []int64{12345678, 10000000, 10000000}}) } @@ -173,26 +233,140 @@ func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) { newTime := bal.MinMtime + 3600 // The excess replica is too new to delete. bal.try(c, tester{ - desired: 2, + desired: map[string]int{"default": 2}, current: slots{0, 1, 2}, timestamps: []int64{oldTime, newTime, newTime + 1}}) // The best replicas are too new to delete, but the excess // replica is old enough. bal.try(c, tester{ - desired: 2, + desired: map[string]int{"default": 2}, current: slots{0, 1, 2}, timestamps: []int64{newTime, newTime + 1, oldTime}, shouldTrash: slots{2}}) } +func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { + // For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in + // probe order. For these tests we give it two mounts, one + // with classes=[special], one with + // classes=[special,special2]. + bal.srvs[9].mounts = []*KeepMount{{ + KeepMount: arvados.KeepMount{ + Replication: 1, + StorageClasses: []string{"special"}, + UUID: "zzzzz-mount-special00000009", + DeviceID: "9-special", + }, + KeepService: bal.srvs[9], + }, { + KeepMount: arvados.KeepMount{ + Replication: 1, + StorageClasses: []string{"special", "special2"}, + UUID: "zzzzz-mount-special20000009", + DeviceID: "9-special-and-special2", + }, + KeepService: bal.srvs[9], + }} + // For known blocks 0/1/2/3, server 13 (d) is slot 5/3/11/1 in + // probe order. We give it two mounts, one with + // classes=[special3], one with classes=[default]. + bal.srvs[13].mounts = []*KeepMount{{ + KeepMount: arvados.KeepMount{ + Replication: 1, + StorageClasses: []string{"special2"}, + UUID: "zzzzz-mount-special2000000d", + DeviceID: "13-special2", + }, + KeepService: bal.srvs[13], + }, { + KeepMount: arvados.KeepMount{ + Replication: 1, + StorageClasses: []string{"default"}, + UUID: "zzzzz-mount-00000000000000d", + DeviceID: "13-default", + }, + KeepService: bal.srvs[13], + }} + // Pull to slot 9 because that's the only server with the + // desired class "special". + bal.try(c, tester{ + known: 0, + desired: map[string]int{"default": 2, "special": 1}, + current: slots{0, 1}, + shouldPull: slots{9}, + shouldPullMounts: []string{"zzzzz-mount-special00000009"}}) + // If some storage classes are not satisfied, don't trash any + // excess replicas. (E.g., if someone desires repl=1 on + // class=durable, and we have two copies on class=volatile, we + // should wait for pull to succeed before trashing anything). + bal.try(c, tester{ + known: 0, + desired: map[string]int{"special": 1}, + current: slots{0, 1}, + shouldPull: slots{9}, + shouldPullMounts: []string{"zzzzz-mount-special00000009"}}) + // Once storage classes are satisfied, trash excess replicas + // that appear earlier in probe order but aren't needed to + // satisfy the desired classes. + bal.try(c, tester{ + known: 0, + desired: map[string]int{"special": 1}, + current: slots{0, 1, 9}, + shouldTrash: slots{0, 1}}) + // Pull to slot 5, the best server with class "special2". + bal.try(c, tester{ + known: 0, + desired: map[string]int{"special2": 1}, + current: slots{0, 1}, + shouldPull: slots{5}, + shouldPullMounts: []string{"zzzzz-mount-special2000000d"}}) + // Pull to slot 5 and 9 to get replication 2 in desired class + // "special2". + bal.try(c, tester{ + known: 0, + desired: map[string]int{"special2": 2}, + current: slots{0, 1}, + shouldPull: slots{5, 9}, + shouldPullMounts: []string{"zzzzz-mount-special20000009", "zzzzz-mount-special2000000d"}}) + // Slot 0 has a replica in "default", slot 1 has a replica + // in "special"; we need another replica in "default", i.e., + // on slot 2. + bal.try(c, tester{ + known: 1, + desired: map[string]int{"default": 2, "special": 1}, + current: slots{0, 1}, + shouldPull: slots{2}}) + // Pull to best probe position 0 (despite wrong storage class) + // if it's impossible to achieve desired replication in the + // desired class (only slots 1 and 3 have special2). + bal.try(c, tester{ + known: 1, + desired: map[string]int{"special2": 3}, + current: slots{3}, + shouldPull: slots{0, 1}}) + // Trash excess replica. + bal.try(c, tester{ + known: 3, + desired: map[string]int{"special": 1}, + current: slots{0, 1}, + shouldTrash: slots{1}}) + // Leave one copy on slot 1 because slot 0 (server 9) only + // gives us repl=1. + bal.try(c, tester{ + known: 3, + desired: map[string]int{"special": 2}, + current: slots{0, 1}}) +} + // Clear all servers' changesets, balance a single block, and verify // the appropriate changes for that block have been added to the // changesets. func (bal *balancerSuite) try(c *check.C, t tester) { - bal.setupServiceRoots() + bal.setupLookupTables() blk := &BlockState{ + Replicas: bal.replList(t.known, t.current), Desired: t.desired, - Replicas: bal.replList(t.known, t.current)} + } for i, t := range t.timestamps { blk.Replicas[i].Mtime = t } @@ -202,6 +376,7 @@ func (bal *balancerSuite) try(c *check.C, t tester) { bal.balanceBlock(knownBlkid(t.known), blk) var didPull, didTrash slots + var didPullMounts, didTrashMounts []string for i, srv := range bal.srvs { var slot int for probeOrder, srvNum := range bal.knownRendezvous[t.known] { @@ -211,10 +386,12 @@ func (bal *balancerSuite) try(c *check.C, t tester) { } for _, pull := range srv.Pulls { didPull = append(didPull, slot) + didPullMounts = append(didPullMounts, pull.To.UUID) c.Check(pull.SizedDigest, check.Equals, knownBlkid(t.known)) } for _, trash := range srv.Trashes { didTrash = append(didTrash, slot) + didTrashMounts = append(didTrashMounts, trash.From.UUID) c.Check(trash.SizedDigest, check.Equals, knownBlkid(t.known)) } } @@ -224,11 +401,19 @@ func (bal *balancerSuite) try(c *check.C, t tester) { } c.Check(didPull, check.DeepEquals, t.shouldPull) c.Check(didTrash, check.DeepEquals, t.shouldTrash) + if t.shouldPullMounts != nil { + sort.Strings(didPullMounts) + c.Check(didPullMounts, check.DeepEquals, t.shouldPullMounts) + } + if t.shouldTrashMounts != nil { + sort.Strings(didTrashMounts) + c.Check(didTrashMounts, check.DeepEquals, t.shouldTrashMounts) + } } // srvList returns the KeepServices, sorted in rendezvous order and -// then selected by idx. For example, srvList(3, 0, 1, 4) returns the -// the first-, second-, and fifth-best servers for storing +// then selected by idx. For example, srvList(3, slots{0, 1, 4}) +// returns the the first-, second-, and fifth-best servers for storing // bal.knownBlkid(3). func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepService) { for _, i := range order { @@ -240,9 +425,14 @@ func (bal *balancerSuite) srvList(knownBlockID int, order slots) (srvs []*KeepSe // replList is like srvList but returns an "existing replicas" slice, // suitable for a BlockState test fixture. func (bal *balancerSuite) replList(knownBlockID int, order slots) (repls []Replica) { + nextMnt := map[*KeepService]int{} mtime := time.Now().UnixNano() - (bal.signatureTTL+86400)*1e9 for _, srv := range bal.srvList(knownBlockID, order) { - repls = append(repls, Replica{srv, mtime}) + // round-robin repls onto each srv's mounts + n := nextMnt[srv] + nextMnt[srv] = (n + 1) % len(srv.mounts) + + repls = append(repls, Replica{srv.mounts[n], mtime}) mtime++ } return