Merge branch '17800-arvput-no-follow-links'
authorLucas Di Pentima <lucas.dipentima@curii.com>
Fri, 18 Jun 2021 11:03:15 +0000 (08:03 -0300)
committerLucas Di Pentima <lucas.dipentima@curii.com>
Fri, 18 Jun 2021 11:03:15 +0000 (08:03 -0300)
Closes #17800

Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima@curii.com>

lib/config/config.default.yml
lib/config/generated_config.go
lib/config/load.go
lib/config/load_test.go
services/keep-web/s3.go
services/keep-web/s3_test.go
tools/salt-install/provision.sh

index e2ef9899e578d8f50d3ca720b334d046c53f5cf9..f0794a7e5320b115f1cac6b2d0f34c87cb3d7394 100644 (file)
@@ -24,49 +24,42 @@ Clusters:
 
       # In each of the service sections below, the keys under
       # InternalURLs are the endpoints where the service should be
-      # listening, and reachable from other hosts in the cluster.
-      SAMPLE:
-        InternalURLs:
-          "http://host1.example:12345": {}
-          "http://host2.example:12345":
-            # Rendezvous is normally empty/omitted. When changing the
-            # URL of a Keepstore service, Rendezvous should be set to
-            # the old URL (with trailing slash omitted) to preserve
-            # rendezvous ordering.
-            Rendezvous: ""
-          SAMPLE:
-            Rendezvous: ""
-        ExternalURL: "-"
+      # listening, and reachable from other hosts in the
+      # cluster. Example:
+      #
+      # InternalURLs:
+      #   "http://host1.example:12345": {}
+      #   "http://host2.example:12345": {}
 
       RailsAPI:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       Controller:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Websocket:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Keepbalance:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       GitHTTP:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       GitSSH:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       DispatchCloud:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       SSO:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Keepproxy:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       WebDAV:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # Base URL for Workbench inline preview.  If blank, use
         # WebDAVDownload instead, and disable inline preview.
         # If both are empty, downloading collections from workbench
@@ -105,7 +98,7 @@ Clusters:
         ExternalURL: ""
 
       WebDAVDownload:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # Base URL for download links. If blank, serve links to WebDAV
         # with disposition=attachment query param.  Unlike preview links,
         # browsers do not render attachments, so there is no risk of XSS.
@@ -119,13 +112,19 @@ Clusters:
         ExternalURL: ""
 
       Keepstore:
-        InternalURLs: {}
+        InternalURLs:
+          SAMPLE:
+            # Rendezvous is normally empty/omitted. When changing the
+            # URL of a Keepstore service, Rendezvous should be set to
+            # the old URL (with trailing slash omitted) to preserve
+            # rendezvous ordering.
+            Rendezvous: ""
         ExternalURL: "-"
       Composer:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       WebShell:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # ShellInABox service endpoint URL for a given VM.  If empty, do not
         # offer web shell logins.
         #
@@ -136,13 +135,13 @@ Clusters:
         # https://*.webshell.uuid_prefix.arvadosapi.com
         ExternalURL: ""
       Workbench1:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Workbench2:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Health:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
 
     PostgreSQL:
index fbee937b39251ff41b72d89325fffee8ff44e7bb..d5e0f200b84559845450c65c36da1092b84ef5d8 100644 (file)
@@ -30,49 +30,42 @@ Clusters:
 
       # In each of the service sections below, the keys under
       # InternalURLs are the endpoints where the service should be
-      # listening, and reachable from other hosts in the cluster.
-      SAMPLE:
-        InternalURLs:
-          "http://host1.example:12345": {}
-          "http://host2.example:12345":
-            # Rendezvous is normally empty/omitted. When changing the
-            # URL of a Keepstore service, Rendezvous should be set to
-            # the old URL (with trailing slash omitted) to preserve
-            # rendezvous ordering.
-            Rendezvous: ""
-          SAMPLE:
-            Rendezvous: ""
-        ExternalURL: "-"
+      # listening, and reachable from other hosts in the
+      # cluster. Example:
+      #
+      # InternalURLs:
+      #   "http://host1.example:12345": {}
+      #   "http://host2.example:12345": {}
 
       RailsAPI:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       Controller:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Websocket:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Keepbalance:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       GitHTTP:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       GitSSH:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       DispatchCloud:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       SSO:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Keepproxy:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       WebDAV:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # Base URL for Workbench inline preview.  If blank, use
         # WebDAVDownload instead, and disable inline preview.
         # If both are empty, downloading collections from workbench
@@ -111,7 +104,7 @@ Clusters:
         ExternalURL: ""
 
       WebDAVDownload:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # Base URL for download links. If blank, serve links to WebDAV
         # with disposition=attachment query param.  Unlike preview links,
         # browsers do not render attachments, so there is no risk of XSS.
@@ -125,13 +118,19 @@ Clusters:
         ExternalURL: ""
 
       Keepstore:
-        InternalURLs: {}
+        InternalURLs:
+          SAMPLE:
+            # Rendezvous is normally empty/omitted. When changing the
+            # URL of a Keepstore service, Rendezvous should be set to
+            # the old URL (with trailing slash omitted) to preserve
+            # rendezvous ordering.
+            Rendezvous: ""
         ExternalURL: "-"
       Composer:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       WebShell:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # ShellInABox service endpoint URL for a given VM.  If empty, do not
         # offer web shell logins.
         #
@@ -142,13 +141,13 @@ Clusters:
         # https://*.webshell.uuid_prefix.arvadosapi.com
         ExternalURL: ""
       Workbench1:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Workbench2:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Health:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
 
     PostgreSQL:
index cc26cdaecc073bf747d308d7acb0a53388f3f4a6..169b252a0e8fbbe44b0e6722e7fe1fe745ca01b6 100644 (file)
@@ -182,6 +182,11 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
                ldr.configdata = buf
        }
 
+       // FIXME: We should reject YAML if the same key is used twice
+       // in a map/object, like {foo: bar, foo: baz}. Maybe we'll get
+       // this fixed free when we upgrade ghodss/yaml to a version
+       // that uses go-yaml v3.
+
        // Load the config into a dummy map to get the cluster ID
        // keys, discarding the values; then set up defaults for each
        // cluster ID; then load the real config on top of the
@@ -291,6 +296,8 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
                        checkKeyConflict(fmt.Sprintf("Clusters.%s.PostgreSQL.Connection", id), cc.PostgreSQL.Connection),
                        ldr.checkEmptyKeepstores(cc),
                        ldr.checkUnlistedKeepstores(cc),
+                       // TODO: check non-empty Rendezvous on
+                       // services other than Keepstore
                } {
                        if err != nil {
                                return nil, err
@@ -354,20 +361,33 @@ func (ldr *Loader) logExtraKeys(expected, supplied map[string]interface{}, prefi
        if ldr.Logger == nil {
                return
        }
-       allowed := map[string]interface{}{}
-       for k, v := range expected {
-               allowed[strings.ToLower(k)] = v
-       }
        for k, vsupp := range supplied {
                if k == "SAMPLE" {
                        // entry will be dropped in removeSampleKeys anyway
                        continue
                }
-               vexp, ok := allowed[strings.ToLower(k)]
+               vexp, ok := expected[k]
                if expected["SAMPLE"] != nil {
+                       // use the SAMPLE entry's keys as the
+                       // "expected" map when checking vsupp
+                       // recursively.
                        vexp = expected["SAMPLE"]
                } else if !ok {
-                       ldr.Logger.Warnf("deprecated or unknown config entry: %s%s", prefix, k)
+                       // check for a case-insensitive match
+                       hint := ""
+                       for ek := range expected {
+                               if strings.EqualFold(k, ek) {
+                                       hint = " (perhaps you meant " + ek + "?)"
+                                       // If we don't delete this, it
+                                       // will end up getting merged,
+                                       // unpredictably
+                                       // merging/overriding the
+                                       // default.
+                                       delete(supplied, k)
+                                       break
+                               }
+                       }
+                       ldr.Logger.Warnf("deprecated or unknown config entry: %s%s%s", prefix, k, hint)
                        continue
                }
                if vsupp, ok := vsupp.(map[string]interface{}); !ok {
index 6c11ee7803116ab2df30f2050f23f7ce6580a9fc..396faca48461b30d7d5708a55f05bafcf73159ac 100644 (file)
@@ -196,21 +196,37 @@ Clusters:
     SystemRootToken: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
     Collections:
      BlobSigningKey: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-    postgresql: {}
-    BadKey: {}
-    Containers: {}
+    PostgreSQL: {}
+    BadKey1: {}
+    Containers:
+      RunTimeEngine: abc
     RemoteClusters:
       z2222:
         Host: z2222.arvadosapi.com
         Proxy: true
-        BadKey: badValue
+        BadKey2: badValue
+    Services:
+      KeepStore:
+        InternalURLs:
+          "http://host.example:12345": {}
+      Keepstore:
+        InternalURLs:
+          "http://host.example:12345":
+            RendezVous: x
+    ServiceS:
+      Keepstore:
+        InternalURLs:
+          "http://host.example:12345": {}
+    Volumes:
+      zzzzz-nyw5e-aaaaaaaaaaaaaaa: {}
 `, &logbuf).Load()
        c.Assert(err, check.IsNil)
+       c.Log(logbuf.String())
        logs := strings.Split(strings.TrimSuffix(logbuf.String(), "\n"), "\n")
        for _, log := range logs {
-               c.Check(log, check.Matches, `.*deprecated or unknown config entry:.*BadKey.*`)
+               c.Check(log, check.Matches, `.*deprecated or unknown config entry:.*(RunTimeEngine.*RuntimeEngine|BadKey1|BadKey2|KeepStore|ServiceS|RendezVous).*`)
        }
-       c.Check(logs, check.HasLen, 2)
+       c.Check(logs, check.HasLen, 6)
 }
 
 func (s *LoadSuite) checkSAMPLEKeys(c *check.C, path string, x interface{}) {
@@ -322,8 +338,8 @@ func (s *LoadSuite) TestPostgreSQLKeyConflict(c *check.C) {
        _, err := testLoader(c, `
 Clusters:
  zzzzz:
-  postgresql:
-   connection:
+  PostgreSQL:
+   Connection:
      DBName: dbname
      Host: host
 `, nil).Load()
index f03ff01b8127d6ee1d1056ee72bee5bd6507d26b..6ea9bf9f7a8383cc10ed82e108b76bb3ca97585b 100644 (file)
@@ -136,15 +136,39 @@ func s3stringToSign(alg, scope, signedHeaders string, r *http.Request) (string,
                }
        }
 
-       normalizedURL := *r.URL
-       normalizedURL.RawPath = ""
-       normalizedURL.Path = reMultipleSlashChars.ReplaceAllString(normalizedURL.Path, "/")
-       ctxlog.FromContext(r.Context()).Infof("escapedPath %s", normalizedURL.EscapedPath())
-       canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedURL.EscapedPath(), s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256"))
+       normalizedPath := normalizePath(r.URL.Path)
+       ctxlog.FromContext(r.Context()).Debugf("normalizedPath %q", normalizedPath)
+       canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedPath, s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256"))
        ctxlog.FromContext(r.Context()).Debugf("s3stringToSign: canonicalRequest %s", canonicalRequest)
        return fmt.Sprintf("%s\n%s\n%s\n%s", alg, r.Header.Get("X-Amz-Date"), scope, hashdigest(sha256.New(), canonicalRequest)), nil
 }
 
+func normalizePath(s string) string {
+       // (url.URL).EscapedPath() would be incorrect here. AWS
+       // documentation specifies the URL path should be normalized
+       // according to RFC 3986, i.e., unescaping ALPHA / DIGIT / "-"
+       // / "." / "_" / "~". The implication is that everything other
+       // than those chars (and "/") _must_ be percent-encoded --
+       // even chars like ";" and "," that are not normally
+       // percent-encoded in paths.
+       out := ""
+       for _, c := range []byte(reMultipleSlashChars.ReplaceAllString(s, "/")) {
+               if (c >= 'a' && c <= 'z') ||
+                       (c >= 'A' && c <= 'Z') ||
+                       (c >= '0' && c <= '9') ||
+                       c == '-' ||
+                       c == '.' ||
+                       c == '_' ||
+                       c == '~' ||
+                       c == '/' {
+                       out += string(c)
+               } else {
+                       out += fmt.Sprintf("%%%02X", c)
+               }
+       }
+       return out
+}
+
 func s3signature(secretKey, scope, signedHeaders, stringToSign string) (string, error) {
        // scope is {datestamp}/{region}/{service}/aws4_request
        drs := strings.Split(scope, "/")
index 4f70168b5629ecfbdb06193d75344cb4e27673eb..f411fde871cdba0cc9bbb2584be9a9bb878c6c1e 100644 (file)
@@ -558,12 +558,15 @@ func (s *IntegrationSuite) TestS3NormalizeURIForSignature(c *check.C) {
                rawPath        string
                normalizedPath string
        }{
-               {"/foo", "/foo"},             // boring case
-               {"/foo%5fbar", "/foo_bar"},   // _ must not be escaped
-               {"/foo%2fbar", "/foo/bar"},   // / must not be escaped
-               {"/(foo)", "/%28foo%29"},     // () must be escaped
-               {"/foo%5bbar", "/foo%5Bbar"}, // %XX must be uppercase
+               {"/foo", "/foo"},                           // boring case
+               {"/foo%5fbar", "/foo_bar"},                 // _ must not be escaped
+               {"/foo%2fbar", "/foo/bar"},                 // / must not be escaped
+               {"/(foo)/[];,", "/%28foo%29/%5B%5D%3B%2C"}, // ()[];, must be escaped
+               {"/foo%5bbar", "/foo%5Bbar"},               // %XX must be uppercase
+               {"//foo///.bar", "/foo/.bar"},              // "//" and "///" must be squashed to "/"
        } {
+               c.Logf("trial %q", trial)
+
                date := time.Now().UTC().Format("20060102T150405Z")
                scope := "20200202/zzzzz/S3/aws4_request"
                canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", "GET", trial.normalizedPath, "", "host:host.example.com\n", "host", "")
@@ -1098,6 +1101,15 @@ func (s *IntegrationSuite) TestS3cmd(c *check.C) {
        buf, err := cmd.CombinedOutput()
        c.Check(err, check.IsNil)
        c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`)
+
+       // This tests whether s3cmd's path normalization agrees with
+       // keep-web's signature verification wrt chars like "|"
+       // (neither reserved nor unreserved) and "," (not normally
+       // percent-encoded in a path).
+       cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.Addr, "--host-bucket="+s.testServer.Addr, "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo,;$[|]bar")
+       buf, err = cmd.CombinedOutput()
+       c.Check(err, check.NotNil)
+       c.Check(string(buf), check.Matches, `(?ms).*NoSuchKey.*\n`)
 }
 
 func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) {
index c1af511ad464f5a447a4552e8181dcfff23ffefc..7faf5c2fb554eec71400107ca80f6087970e0550 100755 (executable)
@@ -135,7 +135,7 @@ VERSION="latest"
 
 # The arvados-formula version.  For a stable release, this should be a
 # branch name (e.g. X.Y-dev) or tag for the release.
-ARVADOS_TAG="master"
+ARVADOS_TAG="main"
 
 # Other formula versions we depend on
 POSTGRES_TAG="v0.41.6"