17009: Support accessing S3 with virtual hosted-style URLs.
authorTom Clegg <tom@tomclegg.ca>
Wed, 18 Nov 2020 22:35:29 +0000 (17:35 -0500)
committerNico Cesar <nico@nicocesar.com>
Thu, 10 Dec 2020 20:27:10 +0000 (15:27 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

doc/api/keep-s3.html.textile.liquid
services/keep-web/s3.go
services/keep-web/s3_test.go
services/keep-web/server_test.go

index 2cae817613699a4ba08467742c736e4827fa058e..d5ad1dc60269cab69b45cd9f7b756904ce644fac 100644 (file)
@@ -21,7 +21,11 @@ To access Arvados S3 using an S3 client library, you must tell it to use the URL
 
 The "bucket name" is an Arvados collection uuid, portable data hash, or project uuid.
 
 
 The "bucket name" is an Arvados collection uuid, portable data hash, or project uuid.
 
-The bucket name must be encoded as the first path segment of every request.  This is what the S3 documentation calls "Path-Style Requests".
+Path-style and virtual host-style requests are supported.
+* A path-style request uses the hostname indicated by @Services.WebDAVDownload.ExternalURL@, with the bucket name in the first path segment: @https://download.example.com/zzzzz-4zz18-asdfgasdfgasdfg/@.
+* A virtual host-style request uses the hostname pattern indicated by @Services.WebDAV.ExternalURL@, with a bucket name in place of the leading @*@: @https://zzzzz-4zz18-asdfgasdfgasdfg.collections.example.com/@.
+
+If you have wildcard DNS, TLS, and routing set up, an S3 client configured with endpoint @collections.example.com@ should work regardless of which request style it uses.
 
 h3. Supported Operations
 
 
 h3. Supported Operations
 
index 49fb2456f5851662bec9573af6e06978d930d741..57c9d7efb30a88bc7b5d69049087d91f9aa3c5e9 100644 (file)
@@ -205,7 +205,15 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
        fs := client.SiteFileSystem(kc)
        fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution)
 
        fs := client.SiteFileSystem(kc)
        fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution)
 
-       objectNameGiven := strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 1
+       var objectNameGiven bool
+       fspath := "/by_id"
+       if id := parseCollectionIDFromDNSName(r.Host); id != "" {
+               fspath += "/" + id
+               objectNameGiven = true
+       } else {
+               objectNameGiven = strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 1
+       }
+       fspath += r.URL.Path
 
        switch {
        case r.Method == http.MethodGet && !objectNameGiven:
 
        switch {
        case r.Method == http.MethodGet && !objectNameGiven:
@@ -221,7 +229,6 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                }
                return true
        case r.Method == http.MethodGet || r.Method == http.MethodHead:
                }
                return true
        case r.Method == http.MethodGet || r.Method == http.MethodHead:
-               fspath := "/by_id" + r.URL.Path
                fi, err := fs.Stat(fspath)
                if r.Method == "HEAD" && !objectNameGiven {
                        // HeadBucket
                fi, err := fs.Stat(fspath)
                if r.Method == "HEAD" && !objectNameGiven {
                        // HeadBucket
@@ -255,7 +262,6 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        http.Error(w, "missing object name in PUT request", http.StatusBadRequest)
                        return true
                }
                        http.Error(w, "missing object name in PUT request", http.StatusBadRequest)
                        return true
                }
-               fspath := "by_id" + r.URL.Path
                var objectIsDir bool
                if strings.HasSuffix(fspath, "/") {
                        if !h.Config.cluster.Collections.S3FolderObjects {
                var objectIsDir bool
                if strings.HasSuffix(fspath, "/") {
                        if !h.Config.cluster.Collections.S3FolderObjects {
@@ -350,7 +356,6 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        http.Error(w, "missing object name in DELETE request", http.StatusBadRequest)
                        return true
                }
                        http.Error(w, "missing object name in DELETE request", http.StatusBadRequest)
                        return true
                }
-               fspath := "by_id" + r.URL.Path
                if strings.HasSuffix(fspath, "/") {
                        fspath = strings.TrimSuffix(fspath, "/")
                        fi, err := fs.Stat(fspath)
                if strings.HasSuffix(fspath, "/") {
                        fspath = strings.TrimSuffix(fspath, "/")
                        fi, err := fs.Stat(fspath)
index 786e68afec4ca197980b56270e3f0bc66ab7494d..f8dc60086123a53f15f4c3e60d646e6041cfdf76 100644 (file)
@@ -700,3 +700,12 @@ func (s *IntegrationSuite) TestS3cmd(c *check.C) {
        c.Check(err, check.IsNil)
        c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`)
 }
        c.Check(err, check.IsNil)
        c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`)
 }
+
+func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       hdr, body, _ := s.runCurl(c, "AWS "+arvadostest.ActiveTokenV2+":none", stage.coll.UUID+".collections.example.com", "/sailboat.txt")
+       c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+       c.Check(body, check.Equals, "⛵\n")
+}
index acdc11b305335fd25afe3fed4c27122c3488c84f..43817b51fcc78adaefe6525798b8ef400dbe2512 100644 (file)
@@ -257,12 +257,16 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 }
 
 // Return header block and body.
 }
 
 // Return header block and body.
-func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
+func (s *IntegrationSuite) runCurl(c *check.C, auth, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
        curlArgs := []string{"--silent", "--show-error", "--include"}
        testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
        curlArgs = append(curlArgs, "--resolve", host+":"+testPort+":"+testHost)
        curlArgs := []string{"--silent", "--show-error", "--include"}
        testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
        curlArgs = append(curlArgs, "--resolve", host+":"+testPort+":"+testHost)
-       if token != "" {
-               curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
+       if strings.Contains(auth, " ") {
+               // caller supplied entire Authorization header value
+               curlArgs = append(curlArgs, "-H", "Authorization: "+auth)
+       } else if auth != "" {
+               // caller supplied Arvados token
+               curlArgs = append(curlArgs, "-H", "Authorization: Bearer "+auth)
        }
        curlArgs = append(curlArgs, args...)
        curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)
        }
        curlArgs = append(curlArgs, args...)
        curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)