14714: Adds keep-balance to cluster config loading
authorEric Biagiotti <ebiagiotti@veritasgenetics.com>
Fri, 6 Sep 2019 18:39:39 +0000 (14:39 -0400)
committerEric Biagiotti <ebiagiotti@veritasgenetics.com>
Thu, 26 Sep 2019 18:18:47 +0000 (14:18 -0400)
Also adds a deprecated config loading test and fixes the service file

Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti@veritasgenetics.com>

lib/config/config.default.yml
lib/config/deprecated.go
lib/config/deprecated_test.go
lib/config/export.go
lib/config/generated_config.go
lib/config/load.go
sdk/go/arvados/config.go
services/keep-balance/keep-balance.service

index 572a2558eda3c291463e56515c0e1583c2a4adc7..4338c18ed1494894ad7ef92a2e78f84aa922a230 100644 (file)
@@ -382,6 +382,36 @@ Clusters:
       # The default is 2 weeks.
       BlobSigningTTL: 336h
 
+      # When running keep-balance, this is the destination filename for the
+      # list of lost block hashes if there are any, one per line. Updated atomically during
+      # each successful run.
+      BlobMissingReport: ""
+
+      # keep-balance operates periodically, i.e.: do a
+      # scan/balance operation, sleep, repeat.
+      #
+      # BalancePeriod determines the interval between start times of
+      # successive scan/balance operations. If a scan/balance operation
+      # takes longer than RunPeriod, the next one will follow it
+      # immediately.
+      #
+      # If SIGUSR1 is received during an idle period between operations,
+      # the next operation will start immediately.
+      BalancePeriod: 10m
+
+      # Limits the number of collections retrieved by keep-balance per
+      # API transaction. If this is zero, page size is
+      # determined by the API server's own page size limits (see
+      # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).
+      BalanceCollectionBatch: 100000
+
+      # The size of keep-balance's internal queue of
+      # collections. Higher values use more memory and improve throughput
+      # by allowing keep-balance to fetch the next page of collections
+      # while the current page is still being processed. If this is zero
+      # or omitted, pages are processed serially.
+      BalanceCollectionBuffers: 1000
+
       # Default lifetime for ephemeral collections: 2 weeks. This must not
       # be less than BlobSigningTTL.
       DefaultTrashLifetime: 336h
index 0a030fb040db9d36de290e29d67f84db76b8522c..ba2c79acf197a07f0cd520864d690e8c21bb511a 100644 (file)
@@ -474,3 +474,71 @@ func (ldr *Loader) loadOldGitHttpdConfig(cfg *arvados.Config) error {
        cfg.Clusters[cluster.ClusterID] = *cluster
        return nil
 }
+
+const defaultKeepBalanceConfigPath = "/etc/arvados/keep-balance/keep-balance.yml"
+
+type oldKeepBalanceConfig struct {
+       Client              *arvados.Client
+       Listen              *string
+       KeepServiceTypes    *[]string
+       KeepServiceList     *arvados.KeepServiceList
+       RunPeriod           *arvados.Duration
+       CollectionBatchSize *int
+       CollectionBuffers   *int
+       RequestTimeout      *arvados.Duration
+       LostBlocksFile      *string
+       ManagementToken     *string
+}
+
+func (ldr *Loader) loadOldKeepBalanceConfig(cfg *arvados.Config) error {
+       if ldr.KeepBalancePath == "" {
+               return nil
+       }
+       var oc oldKeepBalanceConfig
+       err := ldr.loadOldConfigHelper("keep-balance", ldr.KeepBalancePath, &oc)
+       if os.IsNotExist(err) && ldr.KeepBalancePath == defaultKeepBalanceConfigPath {
+               return nil
+       } else if err != nil {
+               return err
+       }
+
+       cluster, err := cfg.GetCluster("")
+       if err != nil {
+               return err
+       }
+
+       loadOldClientConfig(cluster, oc.Client)
+
+       if oc.Listen != nil {
+               cluster.Services.Keepbalance.InternalURLs[arvados.URL{Host: *oc.Listen}] = arvados.ServiceInstance{}
+       }
+       if oc.ManagementToken != nil {
+               cluster.ManagementToken = *oc.ManagementToken
+       }
+       if oc.RunPeriod != nil {
+               cluster.Collections.BalancePeriod = *oc.RunPeriod
+       }
+       if oc.LostBlocksFile != nil {
+               cluster.Collections.BlobMissingReport = *oc.LostBlocksFile
+       }
+       if oc.CollectionBatchSize != nil {
+               cluster.Collections.BalanceCollectionBatch = *oc.CollectionBatchSize
+       }
+       if oc.CollectionBuffers != nil {
+               cluster.Collections.BalanceCollectionBuffers = *oc.CollectionBuffers
+       }
+       if oc.RequestTimeout != nil {
+               cluster.API.KeepServiceRequestTimeout = *oc.RequestTimeout
+       }
+
+       msg := "To balance specfic keep services, please update to the cluster config."
+       if oc.KeepServiceTypes != nil && len(*oc.KeepServiceTypes) > 0 {
+               ldr.Logger.Warnf("The KeepServiceType configuration option is not longer supported and is being ignored. %s", msg)
+       }
+       if oc.KeepServiceList != nil {
+               return fmt.Errorf("The KeepServiceList configuration option is no longer supported. Please remove it from your configuration file. %s", msg)
+       }
+
+       cfg.Clusters[cluster.ClusterID] = *cluster
+       return nil
+}
index ea9b50d035483ab7fd0463669ab4dfda591bdc3f..8b80d6275be2156bf8ae61e1a31523cbcef90458 100644 (file)
@@ -216,3 +216,48 @@ func (s *LoadSuite) TestLegacyArvGitHttpdConfig(c *check.C) {
        c.Check(cluster.Git.Repositories, check.Equals, "/test/reporoot")
        c.Check(cluster.Services.Keepproxy.InternalURLs[arvados.URL{Host: ":9000"}], check.Equals, arvados.ServiceInstance{})
 }
+
+func (s *LoadSuite) TestLegacyKeepBalanceConfig(c *check.C) {
+       f := "-legacy-keepbalance-config"
+       content := []byte(fmtKeepBalanceConfig(""))
+       cluster, err := testLoadLegacyConfig(content, f, c)
+
+       c.Check(err, check.IsNil)
+       c.Check(cluster, check.NotNil)
+       c.Check(cluster.ManagementToken, check.Equals, "xyzzy")
+       c.Check(cluster.Services.Keepbalance.InternalURLs[arvados.URL{Host: ":80"}], check.Equals, arvados.ServiceInstance{})
+       c.Check(cluster.Collections.BalanceCollectionBuffers, check.Equals, 1000)
+       c.Check(cluster.Collections.BalanceCollectionBatch, check.Equals, 100000)
+       c.Check(cluster.Collections.BalancePeriod.String(), check.Equals, "10m")
+       c.Check(cluster.Collections.BlobMissingReport, check.Equals, "testfile")
+       c.Check(cluster.API.KeepServiceRequestTimeout.String(), check.Equals, "30m")
+
+       content = []byte(fmtKeepBalanceConfig(`"KeepServiceTypes":["disk"],`))
+       _, err = testLoadLegacyConfig(content, f, c)
+       c.Check(err, check.IsNil)
+
+       content = []byte(fmtKeepBalanceConfig(`"KeepServiceList":{},`))
+       _, err = testLoadLegacyConfig(content, f, c)
+       c.Check(err, check.NotNil)
+}
+
+func fmtKeepBalanceConfig(param string) string {
+       return fmt.Sprintf(`
+{
+       "Client": {
+               "Scheme": "",
+               "APIHost": "example.com",
+               "AuthToken": "abcdefg",
+               "Insecure": false
+       },
+       "Listen": ":80",
+       %s
+       "RunPeriod": "10m",
+       "CollectionBatchSize": 100000,
+       "CollectionBuffers": 1000,
+       "RequestTimeout": "30m",
+       "ManagementToken": "xyzzy",
+       "LostBlocksFile": "testfile"
+}
+`, param)
+}
index 8df561c00fa0fce082ee9635f7f3fbf1ef067936..5437836f6fee05f3aded39954ea8d626d3c12f6e 100644 (file)
@@ -99,6 +99,10 @@ var whitelist = map[string]bool{
        "Collections.TrashSweepInterval":               false,
        "Collections.TrustAllContent":                  false,
        "Collections.WebDAVCache":                      false,
+       "Collections.BalanceCollectionBatch":           false,
+       "Collections.BalancePeriod":                    false,
+       "Collections.BlobMissingReport":                false,
+       "Collections.BalanceCollectionBuffers":         false,
        "Containers":                                   true,
        "Containers.CloudVMs":                          false,
        "Containers.CrunchRunCommand":                  false,
index 32c101a5a080836aa84a3f2a5d0fc7d244813dda..3806bbd8ad6680743fb4eda0b87dbf5ec86ace58 100644 (file)
@@ -388,6 +388,36 @@ Clusters:
       # The default is 2 weeks.
       BlobSigningTTL: 336h
 
+      # When running keep-balance, this is the destination filename for the
+      # list of lost block hashes if there are any, one per line. Updated atomically during
+      # each successful run.
+      BlobMissingReport: ""
+
+      # keep-balance operates periodically, i.e.: do a
+      # scan/balance operation, sleep, repeat.
+      #
+      # BalancePeriod determines the interval between start times of
+      # successive scan/balance operations. If a scan/balance operation
+      # takes longer than RunPeriod, the next one will follow it
+      # immediately.
+      #
+      # If SIGUSR1 is received during an idle period between operations,
+      # the next operation will start immediately.
+      BalancePeriod: 10m
+
+      # Limits the number of collections retrieved by keep-balance per
+      # API transaction. If this is zero, page size is
+      # determined by the API server's own page size limits (see
+      # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).
+      BalanceCollectionBatch: 100000
+
+      # The size of keep-balance's internal queue of
+      # collections. Higher values use more memory and improve throughput
+      # by allowing keep-balance to fetch the next page of collections
+      # while the current page is still being processed. If this is zero
+      # or omitted, pages are processed serially.
+      BalanceCollectionBuffers: 1000
+
       # Default lifetime for ephemeral collections: 2 weeks. This must not
       # be less than BlobSigningTTL.
       DefaultTrashLifetime: 336h
index 93c36f69ed47d0df580f62a137bc44997db718c1..8c335f4c7c00075ebad7830fab52513926b5c1dd 100644 (file)
@@ -37,6 +37,7 @@ type Loader struct {
        WebsocketPath           string
        KeepproxyPath           string
        GitHttpdPath            string
+       KeepBalancePath         string
 
        configdata []byte
 }
@@ -69,6 +70,7 @@ func (ldr *Loader) SetupFlags(flagset *flag.FlagSet) {
        flagset.StringVar(&ldr.WebsocketPath, "legacy-ws-config", defaultWebsocketConfigPath, "Legacy arvados-ws configuration `file`")
        flagset.StringVar(&ldr.KeepproxyPath, "legacy-keepproxy-config", defaultKeepproxyConfigPath, "Legacy keepproxy configuration `file`")
        flagset.StringVar(&ldr.GitHttpdPath, "legacy-git-httpd-config", defaultGitHttpdConfigPath, "Legacy arv-git-httpd configuration `file`")
+       flagset.StringVar(&ldr.KeepBalancePath, "legacy-keepbalance-config", defaultKeepBalanceConfigPath, "Legacy keep-balance configuration `file`")
        flagset.BoolVar(&ldr.SkipLegacy, "skip-legacy", false, "Don't load legacy config files")
 }
 
@@ -149,6 +151,9 @@ func (ldr *Loader) MungeLegacyConfigArgs(lgr logrus.FieldLogger, args []string,
        if legacyConfigArg != "-legacy-git-httpd-config" {
                ldr.GitHttpdPath = ""
        }
+       if legacyConfigArg != "-legacy-keepbalance-config" {
+               ldr.KeepBalancePath = ""
+       }
 
        return munged
 }
@@ -251,6 +256,7 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
                        ldr.loadOldWebsocketConfig(&cfg),
                        ldr.loadOldKeepproxyConfig(&cfg),
                        ldr.loadOldGitHttpdConfig(&cfg),
+                       ldr.loadOldKeepBalanceConfig(&cfg),
                } {
                        if err != nil {
                                return nil, err
index 076a3c44d7701c63e691d1ff54a1bae4be8e6dab..7c1c3538094869ff82a510226575a2dbbd0491ab 100644 (file)
@@ -119,6 +119,11 @@ type Cluster struct {
                TrashSweepInterval    Duration
                TrustAllContent       bool
 
+               BlobMissingReport        string
+               BalancePeriod            Duration
+               BalanceCollectionBatch   int
+               BalanceCollectionBuffers int
+
                WebDAVCache WebDAVCacheConfig
        }
        Git struct {
index 563871607874f9ad44a07315ce08bfd68274a23b..1b71fb4e44350bac913961e598494d0c01a333ab 100644 (file)
@@ -6,7 +6,6 @@
 Description=Arvados Keep Balance
 Documentation=https://doc.arvados.org/
 After=network.target
-AssertPathExists=/etc/arvados/keep-balance/keep-balance.yml
 
 # systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
 StartLimitInterval=0