13647: Advise deleting keep_services entries after migrating.
authorTom Clegg <tclegg@veritasgenetics.com>
Wed, 18 Sep 2019 20:28:46 +0000 (16:28 -0400)
committerTom Clegg <tclegg@veritasgenetics.com>
Wed, 18 Sep 2019 20:28:46 +0000 (16:28 -0400)
Also warn about orphaned keepstore servers with no configured volumes,
and volumes configured to be accessed by nonexistent keepstore
servers.

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

lib/config/deprecated_keepstore.go
lib/config/deprecated_keepstore_test.go
lib/config/load.go
sdk/go/arvados/keep_service.go

index 1e44052a66ad891a6f252133be06d2ef0b3fc3f7..1969c4c3c31f6bba0b597fe775f14025bd41cb5d 100644 (file)
@@ -16,6 +16,7 @@ import (
        "os"
        "strconv"
        "strings"
+       "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "github.com/sirupsen/logrus"
@@ -106,7 +107,7 @@ func (ldr *Loader) loadOldKeepstoreConfig(cfg *arvados.Config) error {
 
        var oc oldKeepstoreConfig
        err = ldr.loadOldConfigHelper("keepstore", ldr.KeepstorePath, &oc)
-       if os.IsNotExist(err) && (ldr.KeepstorePath == defaultKeepstoreConfigPath) {
+       if os.IsNotExist(err) && ldr.KeepstorePath == defaultKeepstoreConfigPath {
                return nil
        } else if err != nil {
                return err
@@ -403,7 +404,7 @@ func (ldr *Loader) alreadyMigrated(oldvol oldKeepstoreVolume, newvols map[string
                        var params arvados.DirectoryVolumeDriverParameters
                        if err := json.Unmarshal(newvol.DriverParameters, &params); err == nil &&
                                oldvol.Root == params.Root {
-                               if _, ok := newvol.AccessViaHosts[myURL]; ok {
+                               if _, ok := newvol.AccessViaHosts[myURL]; ok || len(newvol.AccessViaHosts) == 0 {
                                        return uuid, true
                                }
                        }
@@ -519,14 +520,7 @@ func findKeepServicesItem(cluster *arvados.Cluster, listen string) (uuid string,
                if ks.ServiceType == "proxy" {
                        continue
                } else if keepServiceIsMe(ks, hostname, listen) {
-                       url := arvados.URL{
-                               Scheme: "http",
-                               Host:   net.JoinHostPort(ks.ServiceHost, strconv.Itoa(ks.ServicePort)),
-                       }
-                       if ks.ServiceSSLFlag {
-                               url.Scheme = "https"
-                       }
-                       return ks.UUID, url, nil
+                       return ks.UUID, keepServiceURL(ks), nil
                } else {
                        tried = append(tried, fmt.Sprintf("%s:%d", ks.ServiceHost, ks.ServicePort))
                }
@@ -535,6 +529,17 @@ func findKeepServicesItem(cluster *arvados.Cluster, listen string) (uuid string,
        return
 }
 
+func keepServiceURL(ks arvados.KeepService) arvados.URL {
+       url := arvados.URL{
+               Scheme: "http",
+               Host:   net.JoinHostPort(ks.ServiceHost, strconv.Itoa(ks.ServicePort)),
+       }
+       if ks.ServiceSSLFlag {
+               url.Scheme = "https"
+       }
+       return url
+}
+
 var localhostOrAllInterfaces = map[string]bool{
        "localhost": true,
        "127.0.0.1": true,
@@ -570,3 +575,106 @@ func keepServiceIsMe(ks arvados.KeepService, hostname string, listen string) boo
        kshost := strings.ToLower(ks.ServiceHost)
        return localhostOrAllInterfaces[kshost] || strings.HasPrefix(kshost+".", strings.ToLower(hostname)+".")
 }
+
+// Warn about pending keepstore migration tasks that haven't already
+// been warned about in loadOldKeepstoreConfig() -- i.e., unmigrated
+// keepstore hosts other than the present host, and obsolete content
+// in the keep_services table.
+func (ldr *Loader) checkPendingKeepstoreMigrations(cluster arvados.Cluster) error {
+       if cluster.Services.Controller.ExternalURL.String() == "" {
+               ldr.Logger.Debug("Services.Controller.ExternalURL not configured -- skipping check for pending keepstore config migrations")
+               return nil
+       }
+       client, err := arvados.NewClientFromConfig(&cluster)
+       if err != nil {
+               return err
+       }
+       client.AuthToken = cluster.SystemRootToken
+       var svcList arvados.KeepServiceList
+       err = client.RequestAndDecode(&svcList, "GET", "arvados/v1/keep_services", nil, nil)
+       if err != nil {
+               ldr.Logger.WithError(err).Warn("error retrieving keep_services list -- skipping check for pending keepstore config migrations")
+               return nil
+       }
+       hostname, err := os.Hostname()
+       if err != nil {
+               return fmt.Errorf("error getting hostname: %s", err)
+       }
+       sawTimes := map[time.Time]bool{}
+       for _, ks := range svcList.Items {
+               sawTimes[ks.CreatedAt] = true
+               sawTimes[ks.ModifiedAt] = true
+       }
+       if len(sawTimes) <= 1 {
+               // If all timestamps in the arvados/v1/keep_services
+               // response are identical, it's a clear sign the
+               // response was generated on the fly from the cluster
+               // config, rather than real database records. In that
+               // case (as well as the case where none are listed at
+               // all) it's pointless to look for entries that
+               // haven't yet been migrated to the config file.
+               return nil
+       }
+       needDBRows := false
+       for _, ks := range svcList.Items {
+               if ks.ServiceType == "proxy" {
+                       if len(cluster.Services.Keepproxy.InternalURLs) == 0 {
+                               needDBRows = true
+                               ldr.Logger.Warn("you should migrate your keepproxy configuration to the cluster configuration file")
+                       }
+                       continue
+               }
+               kshost := strings.ToLower(ks.ServiceHost)
+               if localhostOrAllInterfaces[kshost] || strings.HasPrefix(kshost+".", strings.ToLower(hostname)+".") {
+                       // it would be confusing to recommend
+                       // migrating *this* host's legacy keepstore
+                       // config immediately after explaining that
+                       // very migration process in more detail.
+                       continue
+               }
+               ksurl := keepServiceURL(ks)
+               if _, ok := cluster.Services.Keepstore.InternalURLs[ksurl]; ok {
+                       // already added to InternalURLs
+                       continue
+               }
+               ldr.Logger.Warnf("you should migrate the legacy keepstore configuration file on host %s", ks.ServiceHost)
+       }
+       if !needDBRows {
+               ldr.Logger.Warn("you should delete all of your manually added keep_services listings using `arv --format=uuid keep_service list | xargs -n1 arv keep_service delete --uuid` -- when those are deleted, the services listed in your cluster configuration will be used instead")
+       }
+       return nil
+}
+
+// Warn about keepstore servers that have no volumes.
+func (ldr *Loader) checkEmptyKeepstores(cluster arvados.Cluster) error {
+servers:
+       for url := range cluster.Services.Keepstore.InternalURLs {
+               for _, vol := range cluster.Volumes {
+                       if len(vol.AccessViaHosts) == 0 {
+                               // accessible by all servers
+                               return nil
+                       }
+                       if _, ok := vol.AccessViaHosts[url]; ok {
+                               continue servers
+                       }
+               }
+               ldr.Logger.Warnf("keepstore configured at %s does not have access to any volumes", url)
+       }
+       return nil
+}
+
+// Warn about AccessViaHosts entries that don't correspond to any of
+// the listed keepstore services.
+func (ldr *Loader) checkUnlistedKeepstores(cluster arvados.Cluster) error {
+       for uuid, vol := range cluster.Volumes {
+               if uuid == "SAMPLE" {
+                       continue
+               }
+               for url := range vol.AccessViaHosts {
+                       if _, ok := cluster.Services.Keepstore.InternalURLs[url]; !ok {
+                               ldr.Logger.Warnf("Volumes.%s.AccessViaHosts refers to nonexistent keepstore server %s", uuid, url)
+                       }
+               }
+       }
+       return nil
+}
index 0948e2e2f089b98376bece1f335e78ee3ada8a2c..d1028e8bd0bfe35efccd632df719a2fdaedebe73 100644 (file)
@@ -572,6 +572,35 @@ Volumes:
        c.Check(ok, check.Equals, true)
 }
 
+// Ensure logs mention unmigrated servers.
+func (s *KeepstoreMigrationSuite) TestPendingKeepstoreMigrations(c *check.C) {
+       client := arvados.NewClientFromEnv()
+       for _, host := range []string{"keep0", "keep1"} {
+               err := client.RequestAndDecode(new(struct{}), "POST", "arvados/v1/keep_services", nil, map[string]interface{}{
+                       "keep_service": map[string]interface{}{
+                               "service_type": "disk",
+                               "service_host": host + ".zzzzz.example.com",
+                               "service_port": 25107,
+                       },
+               })
+               c.Assert(err, check.IsNil)
+       }
+
+       port, _ := s.getTestKeepstorePortAndMatchingVolumeUUID(c)
+       logs := s.logsWithKeepstoreConfig(c, `
+Listen: :`+strconv.Itoa(port)+`
+Volumes:
+- Type: S3
+  Endpoint: https://storage.googleapis.com
+  Bucket: foo
+`)
+       c.Check(logs, check.Matches, `(?ms).*you should remove the legacy keepstore config file.*`)
+       c.Check(logs, check.Matches, `(?ms).*you should migrate the legacy keepstore configuration file on host keep1.zzzzz.example.com.*`)
+       c.Check(logs, check.Not(check.Matches), `(?ms).*should migrate.*keep0.zzzzz.example.com.*`)
+       c.Check(logs, check.Matches, `(?ms).*keepstore configured at http://keep2.zzzzz.example.com:25107 does not have access to any volumes.*`)
+       c.Check(logs, check.Matches, `(?ms).*Volumes.zzzzz-nyw5e-possconfigerror.AccessViaHosts refers to nonexistent keepstore server http://keep00.zzzzz.example.com:25107.*`)
+}
+
 const clusterConfigForKeepstoreMigrationTest = `
 Clusters:
   zzzzz:
@@ -580,6 +609,8 @@ Clusters:
       Keepstore:
         InternalURLs:
           "http://{{.hostname}}:12345": {}
+          "http://keep0.zzzzz.example.com:25107": {}
+          "http://keep2.zzzzz.example.com:25107": {}
       Controller:
         ExternalURL: "https://{{.controller}}"
     TLS:
@@ -598,7 +629,7 @@ Clusters:
 
       zzzzz-nyw5e-readonlyonother:
         AccessViaHosts:
-          "http://other.host.example:12345": {ReadOnly: true}
+          "http://keep0.zzzzz.example.com:25107": {ReadOnly: true}
         Driver: S3
         DriverParameters:
           Endpoint: https://storage.googleapis.com
@@ -608,7 +639,7 @@ Clusters:
 
       zzzzz-nyw5e-writableonother:
         AccessViaHosts:
-          "http://other.host.example:12345": {}
+          "http://keep0.zzzzz.example.com:25107": {}
         Driver: S3
         DriverParameters:
           Endpoint: https://storage.googleapis.com
@@ -617,6 +648,8 @@ Clusters:
         Replication: 3
 
       zzzzz-nyw5e-localfilesystem:
+        AccessViaHosts:
+          "http://keep0.zzzzz.example.com:25107": {}
         Driver: Directory
         DriverParameters:
           Root: /data/sdd
@@ -629,16 +662,23 @@ Clusters:
         DriverParameters:
           Root: /data/sde
         Replication: 1
+
+      zzzzz-nyw5e-possconfigerror:
+        AccessViaHosts:
+          "http://keep00.zzzzz.example.com:25107": {}
+        Driver: Directory
+        DriverParameters:
+          Root: /data/sdf
+        Replication: 1
 `
 
 // Determine the effect of combining the given legacy keepstore config
 // YAML (just the "Volumes" entries of an old keepstore config file)
 // with the example clusterConfigForKeepstoreMigrationTest config.
 //
-// Return two Volumes configs -- one without loading
-// keepstoreconfigdata ("before") and one with ("after") -- for the
-// caller to compare.
-func (s *KeepstoreMigrationSuite) loadWithKeepstoreConfig(c *check.C, keepstoreVolumesYAML string) (before, after map[string]arvados.Volume) {
+// Return two Volumes configs -- one without loading keepstoreYAML
+// ("before") and one with ("after") -- for the caller to compare.
+func (s *KeepstoreMigrationSuite) loadWithKeepstoreConfig(c *check.C, keepstoreYAML string) (before, after map[string]arvados.Volume) {
        ldr := testLoader(c, s.clusterConfigYAML(c), nil)
        cBefore, err := ldr.Load()
        c.Assert(err, check.IsNil)
@@ -646,7 +686,7 @@ func (s *KeepstoreMigrationSuite) loadWithKeepstoreConfig(c *check.C, keepstoreV
        keepstoreconfig, err := ioutil.TempFile("", "")
        c.Assert(err, check.IsNil)
        defer os.Remove(keepstoreconfig.Name())
-       io.WriteString(keepstoreconfig, keepstoreVolumesYAML)
+       io.WriteString(keepstoreconfig, keepstoreYAML)
 
        ldr = testLoader(c, s.clusterConfigYAML(c), nil)
        ldr.KeepstorePath = keepstoreconfig.Name()
@@ -656,6 +696,24 @@ func (s *KeepstoreMigrationSuite) loadWithKeepstoreConfig(c *check.C, keepstoreV
        return cBefore.Clusters["zzzzz"].Volumes, cAfter.Clusters["zzzzz"].Volumes
 }
 
+// Return the log messages emitted when loading keepstoreYAML along
+// with clusterConfigForKeepstoreMigrationTest.
+func (s *KeepstoreMigrationSuite) logsWithKeepstoreConfig(c *check.C, keepstoreYAML string) string {
+       var logs bytes.Buffer
+
+       keepstoreconfig, err := ioutil.TempFile("", "")
+       c.Assert(err, check.IsNil)
+       defer os.Remove(keepstoreconfig.Name())
+       io.WriteString(keepstoreconfig, keepstoreYAML)
+
+       ldr := testLoader(c, s.clusterConfigYAML(c), &logs)
+       ldr.KeepstorePath = keepstoreconfig.Name()
+       _, err = ldr.Load()
+       c.Assert(err, check.IsNil)
+
+       return logs.String()
+}
+
 func (s *KeepstoreMigrationSuite) clusterConfigYAML(c *check.C) string {
        hostname, err := os.Hostname()
        c.Assert(err, check.IsNil)
index 7e48493939cd67a8322e67fe9f14bf357f26cd76..db4bc9f597e443d5554b6f422df35aa31d673598 100644 (file)
@@ -259,9 +259,15 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
 
        // Check for known mistakes
        for id, cc := range cfg.Clusters {
-               err = checkKeyConflict(fmt.Sprintf("Clusters.%s.PostgreSQL.Connection", id), cc.PostgreSQL.Connection)
-               if err != nil {
-                       return nil, err
+               for _, err = range []error{
+                       checkKeyConflict(fmt.Sprintf("Clusters.%s.PostgreSQL.Connection", id), cc.PostgreSQL.Connection),
+                       ldr.checkPendingKeepstoreMigrations(cc),
+                       ldr.checkEmptyKeepstores(cc),
+                       ldr.checkUnlistedKeepstores(cc),
+               } {
+                       if err != nil {
+                               return nil, err
+                       }
                }
        }
        return &cfg, nil
index e0ae1758d882599fa19d656d7904d0f8ed14efab..97a62fa7bb3933b89e83e428fc4da39de7453fcd 100644 (file)
@@ -10,16 +10,19 @@ import (
        "net/http"
        "strconv"
        "strings"
+       "time"
 )
 
 // KeepService is an arvados#keepService record
 type KeepService struct {
-       UUID           string `json:"uuid"`
-       ServiceHost    string `json:"service_host"`
-       ServicePort    int    `json:"service_port"`
-       ServiceSSLFlag bool   `json:"service_ssl_flag"`
-       ServiceType    string `json:"service_type"`
-       ReadOnly       bool   `json:"read_only"`
+       UUID           string    `json:"uuid"`
+       ServiceHost    string    `json:"service_host"`
+       ServicePort    int       `json:"service_port"`
+       ServiceSSLFlag bool      `json:"service_ssl_flag"`
+       ServiceType    string    `json:"service_type"`
+       ReadOnly       bool      `json:"read_only"`
+       CreatedAt      time.Time `json:"created_at"`
+       ModifiedAt     time.Time `json:"modified_at"`
 }
 
 type KeepMount struct {