Merge branch '17609-diagnostics-cmd'
authorTom Clegg <tom@curii.com>
Fri, 18 Jun 2021 14:58:36 +0000 (10:58 -0400)
committerTom Clegg <tom@curii.com>
Fri, 18 Jun 2021 14:58:36 +0000 (10:58 -0400)
closes #17609

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

14 files changed:
lib/config/config.default.yml
lib/config/generated_config.go
lib/config/load.go
lib/config/load_test.go
sdk/go/arvados/fs_base.go
sdk/go/arvados/fs_collection.go
sdk/go/arvados/fs_site.go
sdk/go/arvados/fs_site_test.go
sdk/python/arvados/commands/put.py
sdk/python/tests/test_arv_put.py
services/keep-web/handler_test.go
services/keep-web/s3.go
services/keep-web/s3_test.go
tools/salt-install/provision.sh

index e2ef9899e578d8f50d3ca720b334d046c53f5cf9..f0794a7e5320b115f1cac6b2d0f34c87cb3d7394 100644 (file)
@@ -24,49 +24,42 @@ Clusters:
 
       # In each of the service sections below, the keys under
       # InternalURLs are the endpoints where the service should be
-      # listening, and reachable from other hosts in the cluster.
-      SAMPLE:
-        InternalURLs:
-          "http://host1.example:12345": {}
-          "http://host2.example:12345":
-            # Rendezvous is normally empty/omitted. When changing the
-            # URL of a Keepstore service, Rendezvous should be set to
-            # the old URL (with trailing slash omitted) to preserve
-            # rendezvous ordering.
-            Rendezvous: ""
-          SAMPLE:
-            Rendezvous: ""
-        ExternalURL: "-"
+      # listening, and reachable from other hosts in the
+      # cluster. Example:
+      #
+      # InternalURLs:
+      #   "http://host1.example:12345": {}
+      #   "http://host2.example:12345": {}
 
       RailsAPI:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       Controller:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Websocket:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Keepbalance:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       GitHTTP:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       GitSSH:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       DispatchCloud:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       SSO:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Keepproxy:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       WebDAV:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # Base URL for Workbench inline preview.  If blank, use
         # WebDAVDownload instead, and disable inline preview.
         # If both are empty, downloading collections from workbench
@@ -105,7 +98,7 @@ Clusters:
         ExternalURL: ""
 
       WebDAVDownload:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # Base URL for download links. If blank, serve links to WebDAV
         # with disposition=attachment query param.  Unlike preview links,
         # browsers do not render attachments, so there is no risk of XSS.
@@ -119,13 +112,19 @@ Clusters:
         ExternalURL: ""
 
       Keepstore:
-        InternalURLs: {}
+        InternalURLs:
+          SAMPLE:
+            # Rendezvous is normally empty/omitted. When changing the
+            # URL of a Keepstore service, Rendezvous should be set to
+            # the old URL (with trailing slash omitted) to preserve
+            # rendezvous ordering.
+            Rendezvous: ""
         ExternalURL: "-"
       Composer:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       WebShell:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # ShellInABox service endpoint URL for a given VM.  If empty, do not
         # offer web shell logins.
         #
@@ -136,13 +135,13 @@ Clusters:
         # https://*.webshell.uuid_prefix.arvadosapi.com
         ExternalURL: ""
       Workbench1:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Workbench2:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Health:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
 
     PostgreSQL:
index fbee937b39251ff41b72d89325fffee8ff44e7bb..d5e0f200b84559845450c65c36da1092b84ef5d8 100644 (file)
@@ -30,49 +30,42 @@ Clusters:
 
       # In each of the service sections below, the keys under
       # InternalURLs are the endpoints where the service should be
-      # listening, and reachable from other hosts in the cluster.
-      SAMPLE:
-        InternalURLs:
-          "http://host1.example:12345": {}
-          "http://host2.example:12345":
-            # Rendezvous is normally empty/omitted. When changing the
-            # URL of a Keepstore service, Rendezvous should be set to
-            # the old URL (with trailing slash omitted) to preserve
-            # rendezvous ordering.
-            Rendezvous: ""
-          SAMPLE:
-            Rendezvous: ""
-        ExternalURL: "-"
+      # listening, and reachable from other hosts in the
+      # cluster. Example:
+      #
+      # InternalURLs:
+      #   "http://host1.example:12345": {}
+      #   "http://host2.example:12345": {}
 
       RailsAPI:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       Controller:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Websocket:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Keepbalance:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       GitHTTP:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       GitSSH:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       DispatchCloud:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
       SSO:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Keepproxy:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       WebDAV:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # Base URL for Workbench inline preview.  If blank, use
         # WebDAVDownload instead, and disable inline preview.
         # If both are empty, downloading collections from workbench
@@ -111,7 +104,7 @@ Clusters:
         ExternalURL: ""
 
       WebDAVDownload:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # Base URL for download links. If blank, serve links to WebDAV
         # with disposition=attachment query param.  Unlike preview links,
         # browsers do not render attachments, so there is no risk of XSS.
@@ -125,13 +118,19 @@ Clusters:
         ExternalURL: ""
 
       Keepstore:
-        InternalURLs: {}
+        InternalURLs:
+          SAMPLE:
+            # Rendezvous is normally empty/omitted. When changing the
+            # URL of a Keepstore service, Rendezvous should be set to
+            # the old URL (with trailing slash omitted) to preserve
+            # rendezvous ordering.
+            Rendezvous: ""
         ExternalURL: "-"
       Composer:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       WebShell:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         # ShellInABox service endpoint URL for a given VM.  If empty, do not
         # offer web shell logins.
         #
@@ -142,13 +141,13 @@ Clusters:
         # https://*.webshell.uuid_prefix.arvadosapi.com
         ExternalURL: ""
       Workbench1:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Workbench2:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: ""
       Health:
-        InternalURLs: {}
+        InternalURLs: {SAMPLE: {}}
         ExternalURL: "-"
 
     PostgreSQL:
index cc26cdaecc073bf747d308d7acb0a53388f3f4a6..169b252a0e8fbbe44b0e6722e7fe1fe745ca01b6 100644 (file)
@@ -182,6 +182,11 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
                ldr.configdata = buf
        }
 
+       // FIXME: We should reject YAML if the same key is used twice
+       // in a map/object, like {foo: bar, foo: baz}. Maybe we'll get
+       // this fixed free when we upgrade ghodss/yaml to a version
+       // that uses go-yaml v3.
+
        // Load the config into a dummy map to get the cluster ID
        // keys, discarding the values; then set up defaults for each
        // cluster ID; then load the real config on top of the
@@ -291,6 +296,8 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
                        checkKeyConflict(fmt.Sprintf("Clusters.%s.PostgreSQL.Connection", id), cc.PostgreSQL.Connection),
                        ldr.checkEmptyKeepstores(cc),
                        ldr.checkUnlistedKeepstores(cc),
+                       // TODO: check non-empty Rendezvous on
+                       // services other than Keepstore
                } {
                        if err != nil {
                                return nil, err
@@ -354,20 +361,33 @@ func (ldr *Loader) logExtraKeys(expected, supplied map[string]interface{}, prefi
        if ldr.Logger == nil {
                return
        }
-       allowed := map[string]interface{}{}
-       for k, v := range expected {
-               allowed[strings.ToLower(k)] = v
-       }
        for k, vsupp := range supplied {
                if k == "SAMPLE" {
                        // entry will be dropped in removeSampleKeys anyway
                        continue
                }
-               vexp, ok := allowed[strings.ToLower(k)]
+               vexp, ok := expected[k]
                if expected["SAMPLE"] != nil {
+                       // use the SAMPLE entry's keys as the
+                       // "expected" map when checking vsupp
+                       // recursively.
                        vexp = expected["SAMPLE"]
                } else if !ok {
-                       ldr.Logger.Warnf("deprecated or unknown config entry: %s%s", prefix, k)
+                       // check for a case-insensitive match
+                       hint := ""
+                       for ek := range expected {
+                               if strings.EqualFold(k, ek) {
+                                       hint = " (perhaps you meant " + ek + "?)"
+                                       // If we don't delete this, it
+                                       // will end up getting merged,
+                                       // unpredictably
+                                       // merging/overriding the
+                                       // default.
+                                       delete(supplied, k)
+                                       break
+                               }
+                       }
+                       ldr.Logger.Warnf("deprecated or unknown config entry: %s%s%s", prefix, k, hint)
                        continue
                }
                if vsupp, ok := vsupp.(map[string]interface{}); !ok {
index 6c11ee7803116ab2df30f2050f23f7ce6580a9fc..396faca48461b30d7d5708a55f05bafcf73159ac 100644 (file)
@@ -196,21 +196,37 @@ Clusters:
     SystemRootToken: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
     Collections:
      BlobSigningKey: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-    postgresql: {}
-    BadKey: {}
-    Containers: {}
+    PostgreSQL: {}
+    BadKey1: {}
+    Containers:
+      RunTimeEngine: abc
     RemoteClusters:
       z2222:
         Host: z2222.arvadosapi.com
         Proxy: true
-        BadKey: badValue
+        BadKey2: badValue
+    Services:
+      KeepStore:
+        InternalURLs:
+          "http://host.example:12345": {}
+      Keepstore:
+        InternalURLs:
+          "http://host.example:12345":
+            RendezVous: x
+    ServiceS:
+      Keepstore:
+        InternalURLs:
+          "http://host.example:12345": {}
+    Volumes:
+      zzzzz-nyw5e-aaaaaaaaaaaaaaa: {}
 `, &logbuf).Load()
        c.Assert(err, check.IsNil)
+       c.Log(logbuf.String())
        logs := strings.Split(strings.TrimSuffix(logbuf.String(), "\n"), "\n")
        for _, log := range logs {
-               c.Check(log, check.Matches, `.*deprecated or unknown config entry:.*BadKey.*`)
+               c.Check(log, check.Matches, `.*deprecated or unknown config entry:.*(RunTimeEngine.*RuntimeEngine|BadKey1|BadKey2|KeepStore|ServiceS|RendezVous).*`)
        }
-       c.Check(logs, check.HasLen, 2)
+       c.Check(logs, check.HasLen, 6)
 }
 
 func (s *LoadSuite) checkSAMPLEKeys(c *check.C, path string, x interface{}) {
@@ -322,8 +338,8 @@ func (s *LoadSuite) TestPostgreSQLKeyConflict(c *check.C) {
        _, err := testLoader(c, `
 Clusters:
  zzzzz:
-  postgresql:
-   connection:
+  PostgreSQL:
+   Connection:
      DBName: dbname
      Host: host
 `, nil).Load()
index 2478641df5478639c92ff2b4d0f57d847165eee7..65c207162b0f8590f463097faa3b7de25043f5fc 100644 (file)
@@ -29,12 +29,29 @@ var (
        ErrIsDirectory       = errors.New("cannot rename file to overwrite existing directory")
        ErrNotADirectory     = errors.New("not a directory")
        ErrPermission        = os.ErrPermission
+       DebugLocksPanicMode  = false
 )
 
 type syncer interface {
        Sync() error
 }
 
+func debugPanicIfNotLocked(l sync.Locker) {
+       if !DebugLocksPanicMode {
+               return
+       }
+       race := false
+       go func() {
+               l.Lock()
+               race = true
+               l.Unlock()
+       }()
+       time.Sleep(10)
+       if race {
+               panic("bug: caller-must-have-lock func called, but nobody has lock")
+       }
+}
+
 // A File is an *os.File-like interface for reading and writing files
 // in a FileSystem.
 type File interface {
@@ -271,6 +288,7 @@ func (n *treenode) IsDir() bool {
 }
 
 func (n *treenode) Child(name string, replace func(inode) (inode, error)) (child inode, err error) {
+       debugPanicIfNotLocked(n)
        child = n.inodes[name]
        if name == "" || name == "." || name == ".." {
                err = ErrInvalidArgument
@@ -333,6 +351,7 @@ func (n *treenode) Sync() error {
 func (n *treenode) MemorySize() (size int64) {
        n.RLock()
        defer n.RUnlock()
+       debugPanicIfNotLocked(n)
        for _, inode := range n.inodes {
                size += inode.MemorySize()
        }
index 0233826a7281e9aa95f5dbb9f74e93ddb1bfd473..22e2b31d57e08d6c5dc813017c62b950f61aac01 100644 (file)
@@ -1167,9 +1167,12 @@ func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) {
                        node = node.Parent()
                        continue
                }
+               modtime := node.Parent().FileInfo().ModTime()
+               node.Lock()
+               locked := node
                node, err = node.Child(name, func(child inode) (inode, error) {
                        if child == nil {
-                               child, err := node.FS().newNode(name, 0755|os.ModeDir, node.Parent().FileInfo().ModTime())
+                               child, err := node.FS().newNode(name, 0755|os.ModeDir, modtime)
                                if err != nil {
                                        return nil, err
                                }
@@ -1181,6 +1184,7 @@ func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) {
                                return child, nil
                        }
                })
+               locked.Unlock()
                if err != nil {
                        return
                }
@@ -1191,10 +1195,13 @@ func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) {
                err = fmt.Errorf("invalid file part %q in path %q", basename, path)
                return
        }
+       modtime := node.FileInfo().ModTime()
+       node.Lock()
+       defer node.Unlock()
        _, err = node.Child(basename, func(child inode) (inode, error) {
                switch child := child.(type) {
                case nil:
-                       child, err = node.FS().newNode(basename, 0755, node.FileInfo().ModTime())
+                       child, err = node.FS().newNode(basename, 0755, modtime)
                        if err != nil {
                                return nil, err
                        }
index 900893aa36420e7c9d2008fff31b36d4bd03e0bf..5225df59ee58ec4c2017054f59ca68ee7936e41e 100644 (file)
@@ -54,6 +54,8 @@ func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem {
 }
 
 func (fs *customFileSystem) MountByID(mount string) {
+       fs.root.treenode.Lock()
+       defer fs.root.treenode.Unlock()
        fs.root.treenode.Child(mount, func(inode) (inode, error) {
                return &vdirnode{
                        treenode: treenode{
@@ -72,12 +74,16 @@ func (fs *customFileSystem) MountByID(mount string) {
 }
 
 func (fs *customFileSystem) MountProject(mount, uuid string) {
+       fs.root.treenode.Lock()
+       defer fs.root.treenode.Unlock()
        fs.root.treenode.Child(mount, func(inode) (inode, error) {
                return fs.newProjectNode(fs.root, mount, uuid), nil
        })
 }
 
 func (fs *customFileSystem) MountUsers(mount string) {
+       fs.root.treenode.Lock()
+       defer fs.root.treenode.Unlock()
        fs.root.treenode.Child(mount, func(inode) (inode, error) {
                return &lookupnode{
                        stale:   fs.Stale,
index b1c627f89c9e43c198dbd7a2a59f059c3cb01372..a41a9f4da992f34febd1312d03d25b3995344658 100644 (file)
@@ -32,6 +32,15 @@ const (
 
 var _ = check.Suite(&SiteFSSuite{})
 
+func init() {
+       // Enable DebugLocksPanicMode sometimes. Don't enable it all
+       // the time, though -- it adds many calls to time.Sleep(),
+       // which could hide different bugs.
+       if time.Now().Seconds()&1 == 0 {
+               DebugLocksPanicMode = true
+       }
+}
+
 type SiteFSSuite struct {
        client *Client
        fs     CustomFileSystem
index 0920369646c6b7556babdef38fced98fee2e18b8..4f0ac1fffaf9c4ee6875c0092ba1090d6e3eb9a6 100644 (file)
@@ -173,7 +173,8 @@ Follow file and directory symlinks (default).
 """)
 _group.add_argument('--no-follow-links', action='store_false', dest='follow_links',
                     help="""
-Do not follow file and directory symlinks.
+Ignore file and directory symlinks. Even paths given explicitly on the
+command line will be skipped if they are symlinks.
 """)
 
 
@@ -259,9 +260,8 @@ def parse_arguments(arguments):
 
     args.paths = ["-" if x == "/dev/stdin" else x for x in args.paths]
 
-    if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
-        if args.filename:
-            arg_parser.error("""
+    if args.filename and (len(args.paths) != 1 or os.path.isdir(args.paths[0])):
+        arg_parser.error("""
     --filename argument cannot be used when storing a directory or
     multiple files.
     """)
@@ -525,6 +525,9 @@ class ArvPutUploadJob(object):
                 self._write_stdin(self.filename or 'stdin')
             elif not os.path.exists(path):
                  raise PathDoesNotExistError(u"file or directory '{}' does not exist.".format(path))
+            elif (not self.follow_links) and os.path.islink(path):
+                self.logger.warning("Skipping symlink '{}'".format(path))
+                continue
             elif os.path.isdir(path):
                 # Use absolute paths on cache index so CWD doesn't interfere
                 # with the caching logic.
@@ -659,6 +662,9 @@ class ArvPutUploadJob(object):
             self._remote_collection.save(num_retries=self.num_retries,
                                          trash_at=self._collection_trash_at())
         else:
+            if len(self._local_collection) == 0:
+                self.logger.warning("No files were uploaded, skipping collection creation.")
+                return
             self._local_collection.save_new(
                 name=self.name, owner_uuid=self.owner_uuid,
                 ensure_unique_name=self.ensure_unique_name,
@@ -1310,7 +1316,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
             output = writer.manifest_text()
     elif args.raw:
         output = ','.join(writer.data_locators())
-    else:
+    elif writer.manifest_locator() is not None:
         try:
             expiration_notice = ""
             if writer.collection_trash_at() is not None:
@@ -1336,6 +1342,8 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
                 "arv-put: Error creating Collection on project: {}.".format(
                     error))
             status = 1
+    else:
+        status = 1
 
     # Print the locator (uuid) of the new collection.
     if output is None:
index caa03a3e03c02692204f3e707fe31df7a97961ad..fac970c95a6fdb13ecbef709ff850798c67840a5 100644 (file)
@@ -295,6 +295,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
         shutil.rmtree(self.tempdir_with_symlink)
 
     def test_symlinks_are_followed_by_default(self):
+        self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir')))
+        self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkedfile')))
         cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink])
         cwriter.start(save_collection=False)
         self.assertIn('linkeddir', cwriter.manifest_text())
@@ -302,12 +304,29 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
         cwriter.destroy_cache()
 
     def test_symlinks_are_not_followed_when_requested(self):
+        self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir')))
+        self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkedfile')))
         cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink],
                                           follow_links=False)
         cwriter.start(save_collection=False)
         self.assertNotIn('linkeddir', cwriter.manifest_text())
         self.assertNotIn('linkedfile', cwriter.manifest_text())
         cwriter.destroy_cache()
+        # Check for bug #17800: passed symlinks should also be ignored.
+        linked_dir = os.path.join(self.tempdir_with_symlink, 'linkeddir')
+        cwriter = arv_put.ArvPutUploadJob([linked_dir], follow_links=False)
+        cwriter.start(save_collection=False)
+        self.assertNotIn('linkeddir', cwriter.manifest_text())
+        cwriter.destroy_cache()
+
+    def test_no_empty_collection_saved(self):
+        self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir')))
+        linked_dir = os.path.join(self.tempdir_with_symlink, 'linkeddir')
+        cwriter = arv_put.ArvPutUploadJob([linked_dir], follow_links=False)
+        cwriter.start(save_collection=True)
+        self.assertIsNone(cwriter.manifest_locator())
+        self.assertEqual('', cwriter.manifest_text())
+        cwriter.destroy_cache()
 
     def test_passing_nonexistant_path_raise_exception(self):
         uuid_str = str(uuid.uuid4())
index 446d591bfd715224651c1d9667e0c451e81f664e..8715ab24f35c0312fcca8152dc464ab60aa582af 100644 (file)
@@ -32,6 +32,10 @@ import (
 
 var _ = check.Suite(&UnitSuite{})
 
+func init() {
+       arvados.DebugLocksPanicMode = true
+}
+
 type UnitSuite struct {
        Config *arvados.Config
 }
index f03ff01b8127d6ee1d1056ee72bee5bd6507d26b..6ea9bf9f7a8383cc10ed82e108b76bb3ca97585b 100644 (file)
@@ -136,15 +136,39 @@ func s3stringToSign(alg, scope, signedHeaders string, r *http.Request) (string,
                }
        }
 
-       normalizedURL := *r.URL
-       normalizedURL.RawPath = ""
-       normalizedURL.Path = reMultipleSlashChars.ReplaceAllString(normalizedURL.Path, "/")
-       ctxlog.FromContext(r.Context()).Infof("escapedPath %s", normalizedURL.EscapedPath())
-       canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedURL.EscapedPath(), s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256"))
+       normalizedPath := normalizePath(r.URL.Path)
+       ctxlog.FromContext(r.Context()).Debugf("normalizedPath %q", normalizedPath)
+       canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedPath, s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256"))
        ctxlog.FromContext(r.Context()).Debugf("s3stringToSign: canonicalRequest %s", canonicalRequest)
        return fmt.Sprintf("%s\n%s\n%s\n%s", alg, r.Header.Get("X-Amz-Date"), scope, hashdigest(sha256.New(), canonicalRequest)), nil
 }
 
+func normalizePath(s string) string {
+       // (url.URL).EscapedPath() would be incorrect here. AWS
+       // documentation specifies the URL path should be normalized
+       // according to RFC 3986, i.e., unescaping ALPHA / DIGIT / "-"
+       // / "." / "_" / "~". The implication is that everything other
+       // than those chars (and "/") _must_ be percent-encoded --
+       // even chars like ";" and "," that are not normally
+       // percent-encoded in paths.
+       out := ""
+       for _, c := range []byte(reMultipleSlashChars.ReplaceAllString(s, "/")) {
+               if (c >= 'a' && c <= 'z') ||
+                       (c >= 'A' && c <= 'Z') ||
+                       (c >= '0' && c <= '9') ||
+                       c == '-' ||
+                       c == '.' ||
+                       c == '_' ||
+                       c == '~' ||
+                       c == '/' {
+                       out += string(c)
+               } else {
+                       out += fmt.Sprintf("%%%02X", c)
+               }
+       }
+       return out
+}
+
 func s3signature(secretKey, scope, signedHeaders, stringToSign string) (string, error) {
        // scope is {datestamp}/{region}/{service}/aws4_request
        drs := strings.Split(scope, "/")
index 4f70168b5629ecfbdb06193d75344cb4e27673eb..f411fde871cdba0cc9bbb2584be9a9bb878c6c1e 100644 (file)
@@ -558,12 +558,15 @@ func (s *IntegrationSuite) TestS3NormalizeURIForSignature(c *check.C) {
                rawPath        string
                normalizedPath string
        }{
-               {"/foo", "/foo"},             // boring case
-               {"/foo%5fbar", "/foo_bar"},   // _ must not be escaped
-               {"/foo%2fbar", "/foo/bar"},   // / must not be escaped
-               {"/(foo)", "/%28foo%29"},     // () must be escaped
-               {"/foo%5bbar", "/foo%5Bbar"}, // %XX must be uppercase
+               {"/foo", "/foo"},                           // boring case
+               {"/foo%5fbar", "/foo_bar"},                 // _ must not be escaped
+               {"/foo%2fbar", "/foo/bar"},                 // / must not be escaped
+               {"/(foo)/[];,", "/%28foo%29/%5B%5D%3B%2C"}, // ()[];, must be escaped
+               {"/foo%5bbar", "/foo%5Bbar"},               // %XX must be uppercase
+               {"//foo///.bar", "/foo/.bar"},              // "//" and "///" must be squashed to "/"
        } {
+               c.Logf("trial %q", trial)
+
                date := time.Now().UTC().Format("20060102T150405Z")
                scope := "20200202/zzzzz/S3/aws4_request"
                canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", "GET", trial.normalizedPath, "", "host:host.example.com\n", "host", "")
@@ -1098,6 +1101,15 @@ func (s *IntegrationSuite) TestS3cmd(c *check.C) {
        buf, err := cmd.CombinedOutput()
        c.Check(err, check.IsNil)
        c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`)
+
+       // This tests whether s3cmd's path normalization agrees with
+       // keep-web's signature verification wrt chars like "|"
+       // (neither reserved nor unreserved) and "," (not normally
+       // percent-encoded in a path).
+       cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.Addr, "--host-bucket="+s.testServer.Addr, "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo,;$[|]bar")
+       buf, err = cmd.CombinedOutput()
+       c.Check(err, check.NotNil)
+       c.Check(string(buf), check.Matches, `(?ms).*NoSuchKey.*\n`)
 }
 
 func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) {
index c1af511ad464f5a447a4552e8181dcfff23ffefc..7faf5c2fb554eec71400107ca80f6087970e0550 100755 (executable)
@@ -135,7 +135,7 @@ VERSION="latest"
 
 # The arvados-formula version.  For a stable release, this should be a
 # branch name (e.g. X.Y-dev) or tag for the release.
-ARVADOS_TAG="master"
+ARVADOS_TAG="main"
 
 # Other formula versions we depend on
 POSTGRES_TAG="v0.41.6"