From: Tom Clegg Date: Tue, 21 Nov 2023 16:42:06 +0000 (-0500) Subject: Merge branch '20690-remove-wb1' X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/9c3df07fcefc04fcc8078a426c69215f49e63bf3?hp=e88556b084ba5af008e0cde991a4502d106e4d09 Merge branch '20690-remove-wb1' refs #20690 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/cmd/arvados-client/cmd_test.go b/cmd/arvados-client/cmd_test.go index cbbc7b1f95..911375c655 100644 --- a/cmd/arvados-client/cmd_test.go +++ b/cmd/arvados-client/cmd_test.go @@ -9,6 +9,7 @@ import ( "io/ioutil" "testing" + "git.arvados.org/arvados.git/lib/cmd" check "gopkg.in/check.v1" ) @@ -23,12 +24,12 @@ type ClientSuite struct{} func (s *ClientSuite) TestBadCommand(c *check.C) { exited := handler.RunCommand("arvados-client", []string{"no such command"}, bytes.NewReader(nil), ioutil.Discard, ioutil.Discard) - c.Check(exited, check.Equals, 2) + c.Check(exited, check.Equals, cmd.EXIT_INVALIDARGUMENT) } func (s *ClientSuite) TestBadSubcommandArgs(c *check.C) { exited := handler.RunCommand("arvados-client", []string{"get"}, bytes.NewReader(nil), ioutil.Discard, ioutil.Discard) - c.Check(exited, check.Equals, 2) + c.Check(exited, check.Equals, cmd.EXIT_INVALIDARGUMENT) } func (s *ClientSuite) TestVersion(c *check.C) { diff --git a/cmd/arvados-server/arvados-controller.service b/cmd/arvados-server/arvados-controller.service index 420cbb035a..f96532de5e 100644 --- a/cmd/arvados-server/arvados-controller.service +++ b/cmd/arvados-server/arvados-controller.service @@ -19,6 +19,7 @@ ExecStart=/usr/bin/arvados-controller LimitNOFILE=65536 Restart=always RestartSec=1 +RestartPreventExitStatus=2 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section StartLimitInterval=0 diff --git a/cmd/arvados-server/arvados-dispatch-cloud.service b/cmd/arvados-server/arvados-dispatch-cloud.service index 8d57e8a161..11887b8f8c 100644 --- a/cmd/arvados-server/arvados-dispatch-cloud.service +++ b/cmd/arvados-server/arvados-dispatch-cloud.service @@ -19,6 +19,7 @@ ExecStart=/usr/bin/arvados-dispatch-cloud LimitNOFILE=65536 Restart=always RestartSec=1 +RestartPreventExitStatus=2 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section StartLimitInterval=0 diff --git a/cmd/arvados-server/arvados-dispatch-lsf.service b/cmd/arvados-server/arvados-dispatch-lsf.service index 65d8786670..f90cd9033d 100644 --- a/cmd/arvados-server/arvados-dispatch-lsf.service +++ b/cmd/arvados-server/arvados-dispatch-lsf.service @@ -19,6 +19,7 @@ ExecStart=/usr/bin/arvados-dispatch-lsf LimitNOFILE=65536 Restart=always RestartSec=1 +RestartPreventExitStatus=2 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section StartLimitInterval=0 diff --git a/cmd/arvados-server/arvados-git-httpd.service b/cmd/arvados-server/arvados-git-httpd.service index b45587ffc0..6e5b0dc8e2 100644 --- a/cmd/arvados-server/arvados-git-httpd.service +++ b/cmd/arvados-server/arvados-git-httpd.service @@ -19,6 +19,7 @@ ExecStart=/usr/bin/arvados-git-httpd LimitNOFILE=65536 Restart=always RestartSec=1 +RestartPreventExitStatus=2 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section StartLimitInterval=0 diff --git a/cmd/arvados-server/arvados-health.service b/cmd/arvados-server/arvados-health.service index cf246b0ee2..ef145e26eb 100644 --- a/cmd/arvados-server/arvados-health.service +++ b/cmd/arvados-server/arvados-health.service @@ -19,6 +19,7 @@ ExecStart=/usr/bin/arvados-health LimitNOFILE=65536 Restart=always RestartSec=1 +RestartPreventExitStatus=2 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section StartLimitInterval=0 diff --git a/cmd/arvados-server/arvados-ws.service b/cmd/arvados-server/arvados-ws.service index f73db5d080..2e88449599 100644 --- a/cmd/arvados-server/arvados-ws.service +++ b/cmd/arvados-server/arvados-ws.service @@ -18,6 +18,7 @@ ExecStart=/usr/bin/arvados-ws LimitNOFILE=65536 Restart=always RestartSec=1 +RestartPreventExitStatus=2 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section StartLimitInterval=0 diff --git a/cmd/arvados-server/crunch-dispatch-slurm.service b/cmd/arvados-server/crunch-dispatch-slurm.service index 51b4e58c35..d2a2fb39d9 100644 --- a/cmd/arvados-server/crunch-dispatch-slurm.service +++ b/cmd/arvados-server/crunch-dispatch-slurm.service @@ -19,6 +19,7 @@ ExecStart=/usr/bin/crunch-dispatch-slurm LimitNOFILE=65536 Restart=always RestartSec=1 +RestartPreventExitStatus=2 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section StartLimitInterval=0 diff --git a/cmd/arvados-server/keep-balance.service b/cmd/arvados-server/keep-balance.service index 1c5808288b..f282f0a650 100644 --- a/cmd/arvados-server/keep-balance.service +++ b/cmd/arvados-server/keep-balance.service @@ -14,12 +14,13 @@ StartLimitIntervalSec=0 [Service] Type=notify EnvironmentFile=-/etc/arvados/environment -ExecStart=/usr/bin/keep-balance -commit-pulls -commit-trash +ExecStart=/usr/bin/keep-balance # Set a reasonable default for the open file limit LimitNOFILE=65536 Restart=always RestartSec=10s Nice=19 +RestartPreventExitStatus=2 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section StartLimitInterval=0 diff --git a/cmd/arvados-server/keep-web.service b/cmd/arvados-server/keep-web.service index c0e193d6d8..4ecd0b4978 100644 --- a/cmd/arvados-server/keep-web.service +++ b/cmd/arvados-server/keep-web.service @@ -19,6 +19,7 @@ ExecStart=/usr/bin/keep-web LimitNOFILE=65536 Restart=always RestartSec=1 +RestartPreventExitStatus=2 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section StartLimitInterval=0 diff --git a/cmd/arvados-server/keepproxy.service b/cmd/arvados-server/keepproxy.service index 7d4d092677..139df1c3fa 100644 --- a/cmd/arvados-server/keepproxy.service +++ b/cmd/arvados-server/keepproxy.service @@ -19,6 +19,7 @@ ExecStart=/usr/bin/keepproxy LimitNOFILE=65536 Restart=always RestartSec=1 +RestartPreventExitStatus=2 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section StartLimitInterval=0 diff --git a/cmd/arvados-server/keepstore.service b/cmd/arvados-server/keepstore.service index bcfde3a788..de0fd1dbd7 100644 --- a/cmd/arvados-server/keepstore.service +++ b/cmd/arvados-server/keepstore.service @@ -23,6 +23,7 @@ ExecStart=/usr/bin/keepstore LimitNOFILE=65536 Restart=always RestartSec=1 +RestartPreventExitStatus=2 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section StartLimitInterval=0 diff --git a/doc/Gemfile.lock b/doc/Gemfile.lock index 420e13146f..561547d6f7 100644 --- a/doc/Gemfile.lock +++ b/doc/Gemfile.lock @@ -1,20 +1,29 @@ GEM remote: https://rubygems.org/ specs: - RedCloth (4.3.2) + RedCloth (4.3.3) coderay (1.1.3) - colorize (1.0.5) + colorize (1.1.0) commonjs (0.2.7) - kramdown (1.17.0) + kramdown (2.4.0) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + kramdown-syntax-coderay (1.0.1) + coderay (~> 1.1) + kramdown (~> 2.0) less (2.6.0) commonjs (~> 0.2.7) liquid (4.0.4) makerakeworkwell (1.0.4) rake (>= 0.9.2, < 15) - rake (13.0.6) - zenweb (3.10.6) + rake (13.1.0) + rexml (3.2.6) + zenweb (3.11.0) coderay (~> 1.0) - kramdown (~> 1.4) + kramdown (~> 2.0) + kramdown-parser-gfm (~> 1.0) + kramdown-syntax-coderay (~> 1.0) less (~> 2.0) makerakeworkwell (~> 1.0) rake (>= 0.9, < 15) diff --git a/lib/cli/get.go b/lib/cli/get.go index 9625214e22..352e7b9af6 100644 --- a/lib/cli/get.go +++ b/lib/cli/get.go @@ -30,12 +30,12 @@ func (getCmd) RunCommand(prog string, args []string, stdin io.Reader, stdout, st flags.SetOutput(stderr) err = flags.Parse(args) if err != nil { - return 2 + return cmd.EXIT_INVALIDARGUMENT } if len(flags.Args()) != 1 { fmt.Fprintf(stderr, "usage of %s:\n", prog) flags.PrintDefaults() - return 2 + return cmd.EXIT_INVALIDARGUMENT } if opts.Short { opts.Format = "uuid" diff --git a/lib/cmd/cmd.go b/lib/cmd/cmd.go index 2b08ab4822..40e80f5eaa 100644 --- a/lib/cmd/cmd.go +++ b/lib/cmd/cmd.go @@ -21,6 +21,8 @@ import ( "github.com/sirupsen/logrus" ) +const EXIT_INVALIDARGUMENT = 2 + type Handler interface { RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int } @@ -104,13 +106,13 @@ func (m Multi) RunCommand(prog string, args []string, stdin io.Reader, stdout, s } else if len(args) < 1 { fmt.Fprintf(stderr, "usage: %s command [args]\n", prog) m.Usage(stderr) - return 2 + return EXIT_INVALIDARGUMENT } else if cmd, ok = m[args[0]]; ok { return cmd.RunCommand(prog+" "+args[0], args[1:], stdin, stdout, stderr) } else { fmt.Fprintf(stderr, "%s: unrecognized command %q\n", prog, args[0]) m.Usage(stderr) - return 2 + return EXIT_INVALIDARGUMENT } } diff --git a/lib/cmd/parseflags.go b/lib/cmd/parseflags.go index 707cacbf52..275e063f31 100644 --- a/lib/cmd/parseflags.go +++ b/lib/cmd/parseflags.go @@ -35,7 +35,7 @@ func ParseFlags(f FlagSet, prog string, args []string, positional string, stderr case nil: if f.NArg() > 0 && positional == "" { fmt.Fprintf(stderr, "unrecognized command line arguments: %v (try -help)\n", f.Args()) - return false, 2 + return false, EXIT_INVALIDARGUMENT } return true, 0 case flag.ErrHelp: @@ -55,6 +55,6 @@ func ParseFlags(f FlagSet, prog string, args []string, positional string, stderr return false, 0 default: fmt.Fprintf(stderr, "error parsing command line arguments: %s (try -help)\n", err) - return false, 2 + return false, EXIT_INVALIDARGUMENT } } diff --git a/lib/config/cmd_test.go b/lib/config/cmd_test.go index d94eea89df..c2854895ca 100644 --- a/lib/config/cmd_test.go +++ b/lib/config/cmd_test.go @@ -33,7 +33,7 @@ func (s *CommandSuite) SetUpSuite(c *check.C) { func (s *CommandSuite) TestDump_BadArg(c *check.C) { var stderr bytes.Buffer code := DumpCommand.RunCommand("arvados config-dump", []string{"-badarg"}, bytes.NewBuffer(nil), bytes.NewBuffer(nil), &stderr) - c.Check(code, check.Equals, 2) + c.Check(code, check.Equals, cmd.EXIT_INVALIDARGUMENT) c.Check(stderr.String(), check.Equals, "error parsing command line arguments: flag provided but not defined: -badarg (try -help)\n") } diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 79889565a0..05bc1309cd 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -638,6 +638,15 @@ Clusters: # once. BalanceUpdateLimit: 100000 + # Maximum number of "pull block from other server" and "trash + # block" requests to send to each keepstore server at a + # time. Smaller values use less memory in keepstore and + # keep-balance. Larger values allow more progress per + # keep-balance iteration. A zero value computes all of the + # needed changes but does not apply any. + BalancePullLimit: 100000 + BalanceTrashLimit: 100000 + # Default lifetime for ephemeral collections: 2 weeks. This must not # be less than BlobSigningTTL. DefaultTrashLifetime: 336h diff --git a/lib/config/export.go b/lib/config/export.go index 20dd9643b0..e51e6fc32c 100644 --- a/lib/config/export.go +++ b/lib/config/export.go @@ -93,7 +93,9 @@ var whitelist = map[string]bool{ "Collections.BalanceCollectionBatch": false, "Collections.BalanceCollectionBuffers": false, "Collections.BalancePeriod": false, + "Collections.BalancePullLimit": false, "Collections.BalanceTimeout": false, + "Collections.BalanceTrashLimit": false, "Collections.BalanceUpdateLimit": false, "Collections.BlobDeleteConcurrency": false, "Collections.BlobMissingReport": false, diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go index 7b9bd847d1..6301ed047a 100644 --- a/sdk/go/arvados/config.go +++ b/sdk/go/arvados/config.go @@ -150,6 +150,8 @@ type Cluster struct { BalanceCollectionBuffers int BalanceTimeout Duration BalanceUpdateLimit int + BalancePullLimit int + BalanceTrashLimit int WebDAVCache WebDAVCacheConfig diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go index e44dfeda87..e71eb07efa 100644 --- a/services/keep-balance/balance.go +++ b/services/keep-balance/balance.go @@ -137,7 +137,7 @@ func (bal *Balancer) Run(ctx context.Context, client *arvados.Client, cluster *a client.Timeout = 0 rs := bal.rendezvousState() - if runOptions.CommitTrash && rs != runOptions.SafeRendezvousState { + if cluster.Collections.BalanceTrashLimit > 0 && rs != runOptions.SafeRendezvousState { if runOptions.SafeRendezvousState != "" { bal.logf("notice: KeepServices list has changed since last run") } @@ -155,6 +155,7 @@ func (bal *Balancer) Run(ctx context.Context, client *arvados.Client, cluster *a if err = bal.GetCurrentState(ctx, client, cluster.Collections.BalanceCollectionBatch, cluster.Collections.BalanceCollectionBuffers); err != nil { return } + bal.setupLookupTables(cluster) bal.ComputeChangeSets() bal.PrintStatistics() if err = bal.CheckSanityLate(); err != nil { @@ -171,14 +172,14 @@ func (bal *Balancer) Run(ctx context.Context, client *arvados.Client, cluster *a } lbFile = nil } - if runOptions.CommitPulls { + if cluster.Collections.BalancePullLimit > 0 { err = bal.CommitPulls(ctx, client) if err != nil { // Skip trash if we can't pull. (Too cautious?) return } } - if runOptions.CommitTrash { + if cluster.Collections.BalanceTrashLimit > 0 { err = bal.CommitTrash(ctx, client) if err != nil { return @@ -542,7 +543,6 @@ func (bal *Balancer) ComputeChangeSets() { // This just calls balanceBlock() once for each block, using a // pool of worker goroutines. defer bal.time("changeset_compute", "wall clock time to compute changesets")() - bal.setupLookupTables() type balanceTask struct { blkid arvados.SizedDigest @@ -577,7 +577,7 @@ func (bal *Balancer) ComputeChangeSets() { bal.collectStatistics(results) } -func (bal *Balancer) setupLookupTables() { +func (bal *Balancer) setupLookupTables(cluster *arvados.Cluster) { bal.serviceRoots = make(map[string]string) bal.classes = defaultClasses bal.mountsByClass = map[string]map[*KeepMount]bool{"default": {}} @@ -609,6 +609,13 @@ func (bal *Balancer) setupLookupTables() { // class" case in balanceBlock depends on the order classes // are considered. sort.Strings(bal.classes) + + for _, srv := range bal.KeepServices { + srv.ChangeSet = &ChangeSet{ + PullLimit: cluster.Collections.BalancePullLimit, + TrashLimit: cluster.Collections.BalanceTrashLimit, + } + } } const ( @@ -957,19 +964,21 @@ type replicationStats struct { } type balancerStats struct { - lost blocksNBytes - overrep blocksNBytes - unref blocksNBytes - garbage blocksNBytes - underrep blocksNBytes - unachievable blocksNBytes - justright blocksNBytes - desired blocksNBytes - current blocksNBytes - pulls int - trashes int - replHistogram []int - classStats map[string]replicationStats + lost blocksNBytes + overrep blocksNBytes + unref blocksNBytes + garbage blocksNBytes + underrep blocksNBytes + unachievable blocksNBytes + justright blocksNBytes + desired blocksNBytes + current blocksNBytes + pulls int + pullsDeferred int + trashes int + trashesDeferred int + replHistogram []int + classStats map[string]replicationStats // collectionBytes / collectionBlockBytes = deduplication ratio collectionBytes int64 // sum(bytes in referenced blocks) across all collections @@ -1092,7 +1101,9 @@ func (bal *Balancer) collectStatistics(results <-chan balanceResult) { } for _, srv := range bal.KeepServices { s.pulls += len(srv.ChangeSet.Pulls) + s.pullsDeferred += srv.ChangeSet.PullsDeferred s.trashes += len(srv.ChangeSet.Trashes) + s.trashesDeferred += srv.ChangeSet.TrashesDeferred } bal.stats = s bal.Metrics.UpdateStats(s) diff --git a/services/keep-balance/balance_run_test.go b/services/keep-balance/balance_run_test.go index 962bd40ade..b7b3fb6123 100644 --- a/services/keep-balance/balance_run_test.go +++ b/services/keep-balance/balance_run_test.go @@ -396,9 +396,7 @@ func (s *runSuite) TestRefuseZeroCollections(c *check.C) { _, err := s.db.Exec(`delete from collections`) c.Assert(err, check.IsNil) opts := RunOptions{ - CommitPulls: true, - CommitTrash: true, - Logger: ctxlog.TestLogger(c), + Logger: ctxlog.TestLogger(c), } s.stub.serveCurrentUserAdmin() s.stub.serveZeroCollections() @@ -416,8 +414,6 @@ func (s *runSuite) TestRefuseZeroCollections(c *check.C) { func (s *runSuite) TestRefuseBadIndex(c *check.C) { opts := RunOptions{ - CommitPulls: true, - CommitTrash: true, ChunkPrefix: "abc", Logger: ctxlog.TestLogger(c), } @@ -439,9 +435,7 @@ func (s *runSuite) TestRefuseBadIndex(c *check.C) { func (s *runSuite) TestRefuseNonAdmin(c *check.C) { opts := RunOptions{ - CommitPulls: true, - CommitTrash: true, - Logger: ctxlog.TestLogger(c), + Logger: ctxlog.TestLogger(c), } s.stub.serveCurrentUserNotAdmin() s.stub.serveZeroCollections() @@ -468,8 +462,6 @@ func (s *runSuite) TestInvalidChunkPrefix(c *check.C) { s.SetUpTest(c) c.Logf("trying invalid prefix %q", trial.prefix) opts := RunOptions{ - CommitPulls: true, - CommitTrash: true, ChunkPrefix: trial.prefix, Logger: ctxlog.TestLogger(c), } @@ -489,9 +481,7 @@ func (s *runSuite) TestInvalidChunkPrefix(c *check.C) { func (s *runSuite) TestRefuseSameDeviceDifferentVolumes(c *check.C) { opts := RunOptions{ - CommitPulls: true, - CommitTrash: true, - Logger: ctxlog.TestLogger(c), + Logger: ctxlog.TestLogger(c), } s.stub.serveCurrentUserAdmin() s.stub.serveZeroCollections() @@ -519,9 +509,7 @@ func (s *runSuite) TestWriteLostBlocks(c *check.C) { s.config.Collections.BlobMissingReport = lostf.Name() defer os.Remove(lostf.Name()) opts := RunOptions{ - CommitPulls: true, - CommitTrash: true, - Logger: ctxlog.TestLogger(c), + Logger: ctxlog.TestLogger(c), } s.stub.serveCurrentUserAdmin() s.stub.serveFooBarFileCollections() @@ -540,10 +528,10 @@ func (s *runSuite) TestWriteLostBlocks(c *check.C) { } func (s *runSuite) TestDryRun(c *check.C) { + s.config.Collections.BalanceTrashLimit = 0 + s.config.Collections.BalancePullLimit = 0 opts := RunOptions{ - CommitPulls: false, - CommitTrash: false, - Logger: ctxlog.TestLogger(c), + Logger: ctxlog.TestLogger(c), } s.stub.serveCurrentUserAdmin() collReqs := s.stub.serveFooBarFileCollections() @@ -561,7 +549,10 @@ func (s *runSuite) TestDryRun(c *check.C) { } c.Check(trashReqs.Count(), check.Equals, 0) c.Check(pullReqs.Count(), check.Equals, 0) - c.Check(bal.stats.pulls, check.Not(check.Equals), 0) + c.Check(bal.stats.pulls, check.Equals, 0) + c.Check(bal.stats.pullsDeferred, check.Not(check.Equals), 0) + c.Check(bal.stats.trashes, check.Equals, 0) + c.Check(bal.stats.trashesDeferred, check.Not(check.Equals), 0) c.Check(bal.stats.underrep.replicas, check.Not(check.Equals), 0) c.Check(bal.stats.overrep.replicas, check.Not(check.Equals), 0) } @@ -570,10 +561,8 @@ func (s *runSuite) TestCommit(c *check.C) { s.config.Collections.BlobMissingReport = c.MkDir() + "/keep-balance-lost-blocks-test-" s.config.ManagementToken = "xyzzy" opts := RunOptions{ - CommitPulls: true, - CommitTrash: true, - Logger: ctxlog.TestLogger(c), - Dumper: ctxlog.TestLogger(c), + Logger: ctxlog.TestLogger(c), + Dumper: ctxlog.TestLogger(c), } s.stub.serveCurrentUserAdmin() s.stub.serveFooBarFileCollections() @@ -608,8 +597,6 @@ func (s *runSuite) TestCommit(c *check.C) { func (s *runSuite) TestChunkPrefix(c *check.C) { s.config.Collections.BlobMissingReport = c.MkDir() + "/keep-balance-lost-blocks-test-" opts := RunOptions{ - CommitPulls: true, - CommitTrash: true, ChunkPrefix: "ac", // catch "foo" but not "bar" Logger: ctxlog.TestLogger(c), Dumper: ctxlog.TestLogger(c), @@ -639,10 +626,8 @@ func (s *runSuite) TestChunkPrefix(c *check.C) { func (s *runSuite) TestRunForever(c *check.C) { s.config.ManagementToken = "xyzzy" opts := RunOptions{ - CommitPulls: true, - CommitTrash: true, - Logger: ctxlog.TestLogger(c), - Dumper: ctxlog.TestLogger(c), + Logger: ctxlog.TestLogger(c), + Dumper: ctxlog.TestLogger(c), } s.stub.serveCurrentUserAdmin() s.stub.serveFooBarFileCollections() diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go index e5bdf9c023..85d4ff8b5d 100644 --- a/services/keep-balance/balance_test.go +++ b/services/keep-balance/balance_test.go @@ -12,6 +12,7 @@ import ( "testing" "time" + "git.arvados.org/arvados.git/lib/config" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/ctxlog" check "gopkg.in/check.v1" @@ -26,6 +27,7 @@ var _ = check.Suite(&balancerSuite{}) type balancerSuite struct { Balancer + config *arvados.Cluster srvs []*KeepService blks map[string]tester knownRendezvous [][]int @@ -72,6 +74,11 @@ func (bal *balancerSuite) SetUpSuite(c *check.C) { bal.signatureTTL = 3600 bal.Logger = ctxlog.TestLogger(c) + + cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load() + c.Assert(err, check.Equals, nil) + bal.config, err = cfg.GetCluster("") + c.Assert(err, check.Equals, nil) } func (bal *balancerSuite) SetUpTest(c *check.C) { @@ -743,7 +750,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) { // the appropriate changes for that block have been added to the // changesets. func (bal *balancerSuite) try(c *check.C, t tester) { - bal.setupLookupTables() + bal.setupLookupTables(bal.config) blk := &BlockState{ Replicas: bal.replList(t.known, t.current), Desired: t.desired, @@ -751,9 +758,6 @@ func (bal *balancerSuite) try(c *check.C, t tester) { for i, t := range t.timestamps { blk.Replicas[i].Mtime = t } - for _, srv := range bal.srvs { - srv.ChangeSet = &ChangeSet{} - } result := bal.balanceBlock(knownBlkid(t.known), blk) var didPull, didTrash slots diff --git a/services/keep-balance/change_set.go b/services/keep-balance/change_set.go index 8e0ba028ac..c3579556bb 100644 --- a/services/keep-balance/change_set.go +++ b/services/keep-balance/change_set.go @@ -60,22 +60,35 @@ func (t Trash) MarshalJSON() ([]byte, error) { // ChangeSet is a set of change requests that will be sent to a // keepstore server. type ChangeSet struct { - Pulls []Pull - Trashes []Trash - mutex sync.Mutex + PullLimit int + TrashLimit int + + Pulls []Pull + PullsDeferred int // number that weren't added because of PullLimit + Trashes []Trash + TrashesDeferred int // number that weren't added because of TrashLimit + mutex sync.Mutex } // AddPull adds a Pull operation. func (cs *ChangeSet) AddPull(p Pull) { cs.mutex.Lock() - cs.Pulls = append(cs.Pulls, p) + if len(cs.Pulls) < cs.PullLimit { + cs.Pulls = append(cs.Pulls, p) + } else { + cs.PullsDeferred++ + } cs.mutex.Unlock() } // AddTrash adds a Trash operation func (cs *ChangeSet) AddTrash(t Trash) { cs.mutex.Lock() - cs.Trashes = append(cs.Trashes, t) + if len(cs.Trashes) < cs.TrashLimit { + cs.Trashes = append(cs.Trashes, t) + } else { + cs.TrashesDeferred++ + } cs.mutex.Unlock() } @@ -83,5 +96,5 @@ func (cs *ChangeSet) AddTrash(t Trash) { func (cs *ChangeSet) String() string { cs.mutex.Lock() defer cs.mutex.Unlock() - return fmt.Sprintf("ChangeSet{Pulls:%d, Trashes:%d}", len(cs.Pulls), len(cs.Trashes)) + return fmt.Sprintf("ChangeSet{Pulls:%d, Trashes:%d} Deferred{Pulls:%d Trashes:%d}", len(cs.Pulls), len(cs.Trashes), cs.PullsDeferred, cs.TrashesDeferred) } diff --git a/services/keep-balance/integration_test.go b/services/keep-balance/integration_test.go index 42463a002a..2e353c92be 100644 --- a/services/keep-balance/integration_test.go +++ b/services/keep-balance/integration_test.go @@ -87,8 +87,6 @@ func (s *integrationSuite) TestBalanceAPIFixtures(c *check.C) { logger := logrus.New() logger.Out = io.MultiWriter(&logBuf, os.Stderr) opts := RunOptions{ - CommitPulls: true, - CommitTrash: true, CommitConfirmedFields: true, Logger: logger, } @@ -101,7 +99,6 @@ func (s *integrationSuite) TestBalanceAPIFixtures(c *check.C) { nextOpts, err := bal.Run(context.Background(), s.client, s.config, opts) c.Check(err, check.IsNil) c.Check(nextOpts.SafeRendezvousState, check.Not(check.Equals), "") - c.Check(nextOpts.CommitPulls, check.Equals, true) if iter == 0 { c.Check(logBuf.String(), check.Matches, `(?ms).*ChangeSet{Pulls:1.*`) c.Check(logBuf.String(), check.Not(check.Matches), `(?ms).*ChangeSet{.*Trashes:[^0]}*`) diff --git a/services/keep-balance/main.go b/services/keep-balance/main.go index 6bc9989589..ec1cb18ee1 100644 --- a/services/keep-balance/main.go +++ b/services/keep-balance/main.go @@ -32,10 +32,10 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s flags := flag.NewFlagSet(prog, flag.ContinueOnError) flags.BoolVar(&options.Once, "once", false, "balance once and then exit") - flags.BoolVar(&options.CommitPulls, "commit-pulls", false, - "send pull requests (make more replicas of blocks that are underreplicated or are not in optimal rendezvous probe order)") - flags.BoolVar(&options.CommitTrash, "commit-trash", false, - "send trash requests (delete unreferenced old blocks, and excess replicas of overreplicated blocks)") + deprCommitPulls := flags.Bool("commit-pulls", true, + "send pull requests (must be true -- configure Collections.BalancePullLimit = 0 to disable.)") + deprCommitTrash := flags.Bool("commit-trash", true, + "send trash requests (must be true -- configure Collections.BalanceTrashLimit = 0 to disable.)") flags.BoolVar(&options.CommitConfirmedFields, "commit-confirmed-fields", true, "update collection fields (replicas_confirmed, storage_classes_confirmed, etc.)") flags.StringVar(&options.ChunkPrefix, "chunk-prefix", "", @@ -55,6 +55,13 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s return code } + if !*deprCommitPulls || !*deprCommitTrash { + fmt.Fprint(stderr, + "Usage error: the -commit-pulls or -commit-trash command line flags are no longer supported.\n", + "Use Collections.BalancePullLimit and Collections.BalanceTrashLimit instead.\n") + return cmd.EXIT_INVALIDARGUMENT + } + // Drop our custom args that would be rejected by the generic // service.Command args = nil diff --git a/services/keep-balance/server.go b/services/keep-balance/server.go index 9bcaec43d8..b20144e3af 100644 --- a/services/keep-balance/server.go +++ b/services/keep-balance/server.go @@ -27,8 +27,6 @@ import ( // RunOptions fields are controlled by command line flags. type RunOptions struct { Once bool - CommitPulls bool - CommitTrash bool CommitConfirmedFields bool ChunkPrefix string Logger logrus.FieldLogger @@ -112,9 +110,9 @@ func (srv *Server) runForever(ctx context.Context) error { logger.Printf("starting up: will scan every %v and on SIGUSR1", srv.Cluster.Collections.BalancePeriod) for { - if !srv.RunOptions.CommitPulls && !srv.RunOptions.CommitTrash { + if srv.Cluster.Collections.BalancePullLimit < 1 && srv.Cluster.Collections.BalanceTrashLimit < 1 { logger.Print("WARNING: Will scan periodically, but no changes will be committed.") - logger.Print("======= Consider using -commit-pulls and -commit-trash flags.") + logger.Print("======= To commit changes, set BalancePullLimit and BalanceTrashLimit values greater than zero.") } if !dblock.KeepBalanceService.Check() {