X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/cc8cffec8e1c612b6be03f4446ab6beebf479f5b..b041a675c577e174680913e0da0bf69b1cca83b6:/services/keep-web/s3_test.go diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go index 51ac5ba98f..b25ef972dc 100644 --- a/services/keep-web/s3_test.go +++ b/services/keep-web/s3_test.go @@ -2,14 +2,18 @@ // // SPDX-License-Identifier: AGPL-3.0 -package main +package keepweb import ( "bytes" + "context" "crypto/rand" + "crypto/sha256" "fmt" "io/ioutil" "net/http" + "net/http/httptest" + "net/url" "os" "os/exec" "strings" @@ -22,6 +26,10 @@ import ( "git.arvados.org/arvados.git/sdk/go/keepclient" "github.com/AdRoll/goamz/aws" "github.com/AdRoll/goamz/s3" + aws_aws "github.com/aws/aws-sdk-go/aws" + aws_credentials "github.com/aws/aws-sdk-go/aws/credentials" + aws_session "github.com/aws/aws-sdk-go/aws/session" + aws_s3 "github.com/aws/aws-sdk-go/service/s3" check "gopkg.in/check.v1" ) @@ -31,12 +39,13 @@ type s3stage struct { kc *keepclient.KeepClient proj arvados.Group projbucket *s3.Bucket + subproj arvados.Group coll arvados.Collection collbucket *s3.Bucket } func (s *IntegrationSuite) s3setup(c *check.C) s3stage { - var proj arvados.Group + var proj, subproj arvados.Group var coll arvados.Collection arv := arvados.NewClientFromEnv() arv.AuthToken = arvadostest.ActiveToken @@ -44,14 +53,34 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage { "group": map[string]interface{}{ "group_class": "project", "name": "keep-web s3 test", + "properties": map[string]interface{}{ + "project-properties-key": "project properties value", + }, }, "ensure_unique_name": true, }) c.Assert(err, check.IsNil) + err = arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{ + "group": map[string]interface{}{ + "owner_uuid": proj.UUID, + "group_class": "project", + "name": "keep-web s3 test subproject", + "properties": map[string]interface{}{ + "subproject_properties_key": "subproject properties value", + "invalid header key": "this value will not be returned because key contains spaces", + }, + }, + }) + c.Assert(err, check.IsNil) err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{ "owner_uuid": proj.UUID, "name": "keep-web s3 test collection", "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n", + "properties": map[string]interface{}{ + "string": "string value", + "array": []string{"element1", "element2"}, + "object": map[string]interface{}{"key": map[string]interface{}{"key2": "value"}}, + }, }}) c.Assert(err, check.IsNil) ac, err := arvadosclient.New(arv) @@ -73,8 +102,8 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage { auth := aws.NewAuth(arvadostest.ActiveTokenUUID, arvadostest.ActiveToken, "", time.Now().Add(time.Hour)) region := aws.Region{ - Name: s.testServer.Addr, - S3Endpoint: "http://" + s.testServer.Addr, + Name: "zzzzz", + S3Endpoint: s.testServer.URL, } client := s3.New(*auth, region) client.Signature = aws.V4Signature @@ -87,7 +116,8 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage { S3: client, Name: proj.UUID, }, - coll: coll, + subproj: subproj, + coll: coll, collbucket: &s3.Bucket{ S3: client, Name: coll.UUID, @@ -118,11 +148,15 @@ func (s *IntegrationSuite) TestS3Signatures(c *check.C) { secretkey string }{ {true, aws.V2Signature, arvadostest.ActiveToken, "none"}, + {true, aws.V2Signature, url.QueryEscape(arvadostest.ActiveTokenV2), "none"}, + {true, aws.V2Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), "none"}, {false, aws.V2Signature, "none", "none"}, {false, aws.V2Signature, "none", arvadostest.ActiveToken}, {true, aws.V4Signature, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken}, {true, aws.V4Signature, arvadostest.ActiveToken, arvadostest.ActiveToken}, + {true, aws.V4Signature, url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2)}, + {true, aws.V4Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1)}, {false, aws.V4Signature, arvadostest.ActiveToken, ""}, {false, aws.V4Signature, arvadostest.ActiveToken, "none"}, {false, aws.V4Signature, "none", arvadostest.ActiveToken}, @@ -173,7 +207,9 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix // GetObject rdr, err = bucket.GetReader(prefix + "missingfile") - c.Check(err, check.ErrorMatches, `404 Not Found`) + c.Check(err.(*s3.Error).StatusCode, check.Equals, 404) + c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`) + c.Check(err, check.ErrorMatches, `The specified key does not exist.`) // HeadObject exists, err := bucket.Exists(prefix + "missingfile") @@ -194,6 +230,77 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix c.Check(err, check.IsNil) c.Check(resp.StatusCode, check.Equals, http.StatusOK) c.Check(resp.ContentLength, check.Equals, int64(4)) + + // HeadObject with superfluous leading slashes + exists, err = bucket.Exists(prefix + "//sailboat.txt") + c.Check(err, check.IsNil) + c.Check(exists, check.Equals, true) +} + +func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) { + got := map[string]string{} + for hk, hv := range hdr { + if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 { + got[k] = hv[0] + } + } + c.Check(got, check.DeepEquals, expect) +} + +func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + + expectCollectionTags := map[string]string{ + "String": "string value", + "Array": `["element1","element2"]`, + "Object": `{"key":{"key2":"value"}}`, + } + expectSubprojectTags := map[string]string{ + "Subproject_properties_key": "subproject properties value", + } + expectProjectTags := map[string]string{ + "Project-Properties-Key": "project properties value", + } + + c.Log("HEAD object with metadata from collection") + resp, err := stage.collbucket.Head("sailboat.txt", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + + c.Log("GET object with metadata from collection") + rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt") + c.Assert(err, check.IsNil) + content, err := ioutil.ReadAll(rdr) + c.Check(err, check.IsNil) + rdr.Close() + c.Check(content, check.HasLen, 4) + s.checkMetaEquals(c, hdr, expectCollectionTags) + + c.Log("HEAD bucket with metadata from collection") + resp, err = stage.collbucket.Head("/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + + c.Log("HEAD directory placeholder with metadata from collection") + resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + + c.Log("HEAD file with metadata from collection") + resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + + c.Log("HEAD directory placeholder with metadata from subproject") + resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectSubprojectTags) + + c.Log("HEAD bucket with metadata from project") + resp, err = stage.projbucket.Head("/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectProjectTags) } func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) { @@ -220,6 +327,18 @@ func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, path: "newdir/newfile", size: 1 << 26, contentType: "application/octet-stream", + }, { + path: "/aaa", + size: 2, + contentType: "application/octet-stream", + }, { + path: "//bbb", + size: 2, + contentType: "application/octet-stream", + }, { + path: "ccc//", + size: 0, + contentType: "application/x-directory", }, { path: "newdir1/newdir2/newfile", size: 0, @@ -235,7 +354,14 @@ func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, objname := prefix + trial.path _, err := bucket.GetReader(objname) - c.Assert(err, check.ErrorMatches, `404 Not Found`) + if !c.Check(err, check.NotNil) { + continue + } + c.Check(err.(*s3.Error).StatusCode, check.Equals, 404) + c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`) + if !c.Check(err, check.ErrorMatches, `The specified key does not exist.`) { + continue + } buf := make([]byte, trial.size) rand.Read(buf) @@ -244,7 +370,7 @@ func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, c.Check(err, check.IsNil) rdr, err := bucket.GetReader(objname) - if strings.HasSuffix(trial.path, "/") && !s.testServer.Config.cluster.Collections.S3FolderObjects { + if strings.HasSuffix(trial.path, "/") && !s.handler.Cluster.Collections.S3FolderObjects { c.Check(err, check.NotNil) continue } else if !c.Check(err, check.IsNil) { @@ -284,16 +410,22 @@ func (s *IntegrationSuite) TestS3ProjectPutObjectNotSupported(c *check.C) { c.Logf("=== %v", trial) _, err := bucket.GetReader(trial.path) - c.Assert(err, check.ErrorMatches, `404 Not Found`) + c.Check(err.(*s3.Error).StatusCode, check.Equals, 404) + c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`) + c.Assert(err, check.ErrorMatches, `The specified key does not exist.`) buf := make([]byte, trial.size) rand.Read(buf) err = bucket.PutReader(trial.path, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{}) - c.Check(err, check.ErrorMatches, `400 Bad Request`) + c.Check(err.(*s3.Error).StatusCode, check.Equals, 400) + c.Check(err.(*s3.Error).Code, check.Equals, `InvalidArgument`) + c.Check(err, check.ErrorMatches, `(mkdir "/by_id/zzzzz-j7d0g-[a-z0-9]{15}/newdir2?"|open "/zzzzz-j7d0g-[a-z0-9]{15}/newfile") failed: invalid (argument|operation)`) _, err = bucket.GetReader(trial.path) - c.Assert(err, check.ErrorMatches, `404 Not Found`) + c.Check(err.(*s3.Error).StatusCode, check.Equals, 404) + c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`) + c.Assert(err, check.ErrorMatches, `The specified key does not exist.`) } } @@ -308,7 +440,7 @@ func (s *IntegrationSuite) TestS3ProjectDeleteObject(c *check.C) { s.testS3DeleteObject(c, stage.projbucket, stage.coll.Name+"/") } func (s *IntegrationSuite) testS3DeleteObject(c *check.C, bucket *s3.Bucket, prefix string) { - s.testServer.Config.cluster.Collections.S3FolderObjects = true + s.handler.Cluster.Collections.S3FolderObjects = true for _, trial := range []struct { path string }{ @@ -345,15 +477,7 @@ func (s *IntegrationSuite) TestS3ProjectPutObjectFailure(c *check.C) { s.testS3PutObjectFailure(c, stage.projbucket, stage.coll.Name+"/") } func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, prefix string) { - s.testServer.Config.cluster.Collections.S3FolderObjects = false - - // Can't use V4 signature for these tests, because - // double-slash is incorrectly cleaned by the aws.V4Signature, - // resulting in a "bad signature" error. (Cleaning the path is - // appropriate for other services, but not in S3 where object - // names "foo//bar" and "foo/bar" are semantically different.) - bucket.S3.Auth = *(aws.NewAuth(arvadostest.ActiveToken, "none", "", time.Now().Add(time.Hour))) - bucket.S3.Signature = aws.V2Signature + s.handler.Cluster.Collections.S3FolderObjects = false var wg sync.WaitGroup for _, trial := range []struct { @@ -377,8 +501,6 @@ func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, path: "/", }, { path: "//", - }, { - path: "foo//bar", }, { path: "", }, @@ -395,13 +517,15 @@ func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, rand.Read(buf) err := bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), "application/octet-stream", s3.Private, s3.Options{}) - if !c.Check(err, check.ErrorMatches, `400 Bad.*`, check.Commentf("PUT %q should fail", objname)) { + if !c.Check(err, check.ErrorMatches, `(invalid object name.*|open ".*" failed.*|object name conflicts with existing object|Missing object name in PUT request.)`, check.Commentf("PUT %q should fail", objname)) { return } if objname != "" && objname != "/" { _, err = bucket.GetReader(objname) - c.Check(err, check.ErrorMatches, `404 Not Found`, check.Commentf("GET %q should return 404", objname)) + c.Check(err.(*s3.Error).StatusCode, check.Equals, 404) + c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`) + c.Check(err, check.ErrorMatches, `The specified key does not exist.`, check.Commentf("GET %q should return 404", objname)) } }() } @@ -423,6 +547,151 @@ func (stage *s3stage) writeBigDirs(c *check.C, dirs int, filesPerDir int) { c.Assert(fs.Sync(), check.IsNil) } +func (s *IntegrationSuite) sign(c *check.C, req *http.Request, key, secret string) { + scope := "20200202/zzzzz/service/aws4_request" + signedHeaders := "date" + req.Header.Set("Date", time.Now().UTC().Format(time.RFC1123)) + stringToSign, err := s3stringToSign(s3SignAlgorithm, scope, signedHeaders, req) + c.Assert(err, check.IsNil) + sig, err := s3signature(secret, scope, signedHeaders, stringToSign) + c.Assert(err, check.IsNil) + req.Header.Set("Authorization", s3SignAlgorithm+" Credential="+key+"/"+scope+", SignedHeaders="+signedHeaders+", Signature="+sig) +} + +func (s *IntegrationSuite) TestS3VirtualHostStyleRequests(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + for _, trial := range []struct { + url string + method string + body string + responseCode int + responseRegexp []string + }{ + { + url: "https://" + stage.collbucket.Name + ".example.com/", + method: "GET", + responseCode: http.StatusOK, + responseRegexp: []string{`(?ms).*sailboat\.txt.*`}, + }, + { + url: "https://" + strings.Replace(stage.coll.PortableDataHash, "+", "-", -1) + ".example.com/", + method: "GET", + responseCode: http.StatusOK, + responseRegexp: []string{`(?ms).*sailboat\.txt.*`}, + }, + { + url: "https://" + stage.projbucket.Name + ".example.com/?prefix=" + stage.coll.Name + "/&delimiter=/", + method: "GET", + responseCode: http.StatusOK, + responseRegexp: []string{`(?ms).*sailboat\.txt.*`}, + }, + { + url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/sailboat.txt", + method: "GET", + responseCode: http.StatusOK, + responseRegexp: []string{`⛵\n`}, + }, + { + url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/beep", + method: "PUT", + body: "boop", + responseCode: http.StatusOK, + }, + { + url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/beep", + method: "GET", + responseCode: http.StatusOK, + responseRegexp: []string{`boop`}, + }, + { + url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop", + method: "GET", + responseCode: http.StatusNotFound, + }, + { + url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop", + method: "PUT", + body: "boop", + responseCode: http.StatusOK, + }, + { + url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop", + method: "GET", + responseCode: http.StatusOK, + responseRegexp: []string{`boop`}, + }, + } { + url, err := url.Parse(trial.url) + c.Assert(err, check.IsNil) + req, err := http.NewRequest(trial.method, url.String(), bytes.NewReader([]byte(trial.body))) + c.Assert(err, check.IsNil) + s.sign(c, req, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken) + rr := httptest.NewRecorder() + s.handler.ServeHTTP(rr, req) + resp := rr.Result() + c.Check(resp.StatusCode, check.Equals, trial.responseCode) + body, err := ioutil.ReadAll(resp.Body) + c.Assert(err, check.IsNil) + for _, re := range trial.responseRegexp { + c.Check(string(body), check.Matches, re) + } + } +} + +func (s *IntegrationSuite) TestS3NormalizeURIForSignature(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + for _, trial := range []struct { + rawPath string + normalizedPath string + }{ + {"/foo", "/foo"}, // boring case + {"/foo%5fbar", "/foo_bar"}, // _ must not be escaped + {"/foo%2fbar", "/foo/bar"}, // / must not be escaped + {"/(foo)/[];,", "/%28foo%29/%5B%5D%3B%2C"}, // ()[];, must be escaped + {"/foo%5bbar", "/foo%5Bbar"}, // %XX must be uppercase + {"//foo///.bar", "/foo/.bar"}, // "//" and "///" must be squashed to "/" + } { + c.Logf("trial %q", trial) + + date := time.Now().UTC().Format("20060102T150405Z") + scope := "20200202/zzzzz/S3/aws4_request" + canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", "GET", trial.normalizedPath, "", "host:host.example.com\n", "host", "") + c.Logf("canonicalRequest %q", canonicalRequest) + expect := fmt.Sprintf("%s\n%s\n%s\n%s", s3SignAlgorithm, date, scope, hashdigest(sha256.New(), canonicalRequest)) + c.Logf("expected stringToSign %q", expect) + + req, err := http.NewRequest("GET", "https://host.example.com"+trial.rawPath, nil) + req.Header.Set("X-Amz-Date", date) + req.Host = "host.example.com" + c.Assert(err, check.IsNil) + + obtained, err := s3stringToSign(s3SignAlgorithm, scope, "host", req) + if !c.Check(err, check.IsNil) { + continue + } + c.Check(obtained, check.Equals, expect) + } +} + +func (s *IntegrationSuite) TestS3GetBucketLocation(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} { + req, err := http.NewRequest("GET", bucket.URL("/"), nil) + c.Check(err, check.IsNil) + req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none") + req.URL.RawQuery = "location" + resp, err := http.DefaultClient.Do(req) + c.Assert(err, check.IsNil) + c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml") + buf, err := ioutil.ReadAll(resp.Body) + c.Assert(err, check.IsNil) + c.Check(string(buf), check.Equals, "\nzzzzz\n") + } +} + func (s *IntegrationSuite) TestS3GetBucketVersioning(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) @@ -440,6 +709,37 @@ func (s *IntegrationSuite) TestS3GetBucketVersioning(c *check.C) { } } +func (s *IntegrationSuite) TestS3UnsupportedAPIs(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + for _, trial := range []struct { + method string + path string + rawquery string + }{ + {"GET", "/", "acl&versionId=1234"}, // GetBucketAcl + {"GET", "/foo", "acl&versionId=1234"}, // GetObjectAcl + {"PUT", "/", "acl"}, // PutBucketAcl + {"PUT", "/foo", "acl"}, // PutObjectAcl + {"DELETE", "/", "tagging"}, // DeleteBucketTagging + {"DELETE", "/foo", "tagging"}, // DeleteObjectTagging + } { + for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} { + c.Logf("trial %v bucket %v", trial, bucket) + req, err := http.NewRequest(trial.method, bucket.URL(trial.path), nil) + c.Check(err, check.IsNil) + req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none") + req.URL.RawQuery = trial.rawquery + resp, err := http.DefaultClient.Do(req) + c.Assert(err, check.IsNil) + c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml") + buf, err := ioutil.ReadAll(resp.Body) + c.Assert(err, check.IsNil) + c.Check(string(buf), check.Matches, "(?ms).*InvalidRequest.*API not supported.*") + } + } +} + // If there are no CommonPrefixes entries, the CommonPrefixes XML tag // should not appear at all. func (s *IntegrationSuite) TestS3ListNoCommonPrefixes(c *check.C) { @@ -477,12 +777,28 @@ func (s *IntegrationSuite) TestS3ListNoNextMarker(c *check.C) { } } +// List response should include KeyCount field. +func (s *IntegrationSuite) TestS3ListKeyCount(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + + req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil) + c.Assert(err, check.IsNil) + req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none") + req.URL.RawQuery = "prefix=&delimiter=/" + resp, err := http.DefaultClient.Do(req) + c.Assert(err, check.IsNil) + buf, err := ioutil.ReadAll(resp.Body) + c.Assert(err, check.IsNil) + c.Check(string(buf), check.Matches, `(?ms).*2.*`) +} + func (s *IntegrationSuite) TestS3CollectionList(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) var markers int - for markers, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} { + for markers, s.handler.Cluster.Collections.S3FolderObjects = range []bool{false, true} { dirs := 2 filesPerDir := 1001 stage.writeBigDirs(c, dirs, filesPerDir) @@ -497,7 +813,7 @@ func (s *IntegrationSuite) TestS3CollectionList(c *check.C) { } } func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix string, pageSize, expectFiles int) { - c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.testServer.Config.cluster.Collections.S3FolderObjects) + c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.handler.Cluster.Collections.S3FolderObjects) expectPageSize := pageSize if expectPageSize > 1000 { expectPageSize = 1000 @@ -533,7 +849,7 @@ func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix stri } func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) { - for _, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} { + for _, s.handler.Cluster.Collections.S3FolderObjects = range []bool{false, true} { s.testS3CollectionListRollup(c) } } @@ -562,7 +878,7 @@ func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) { } } markers := 0 - if s.testServer.Config.cluster.Collections.S3FolderObjects { + if s.handler.Cluster.Collections.S3FolderObjects { markers = 1 } c.Check(allfiles, check.HasLen, dirs*(filesPerDir+markers)+3+markers) @@ -666,6 +982,196 @@ func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) { } } +func (s *IntegrationSuite) TestS3ListObjectsV2(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + dirs := 2 + filesPerDir := 40 + stage.writeBigDirs(c, dirs, filesPerDir) + + sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{ + Region: aws_aws.String("auto"), + Endpoint: aws_aws.String(s.testServer.URL), + Credentials: aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""), + S3ForcePathStyle: aws_aws.Bool(true), + })) + + stringOrNil := func(s string) *string { + if s == "" { + return nil + } else { + return &s + } + } + + client := aws_s3.New(sess) + ctx := context.Background() + + for _, trial := range []struct { + prefix string + delimiter string + startAfter string + maxKeys int + expectKeys int + expectCommonPrefixes map[string]bool + }{ + { + // Expect {filesPerDir plus the dir itself} + // for each dir, plus emptydir, emptyfile, and + // sailboat.txt. + expectKeys: (filesPerDir+1)*dirs + 3, + }, + { + maxKeys: 15, + expectKeys: (filesPerDir+1)*dirs + 3, + }, + { + startAfter: "dir0/z", + maxKeys: 15, + // Expect {filesPerDir plus the dir itself} + // for each dir except dir0, plus emptydir, + // emptyfile, and sailboat.txt. + expectKeys: (filesPerDir+1)*(dirs-1) + 3, + }, + { + maxKeys: 1, + delimiter: "/", + expectKeys: 2, // emptyfile, sailboat.txt + expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true, "emptydir/": true}, + }, + { + startAfter: "dir0/z", + maxKeys: 15, + delimiter: "/", + expectKeys: 2, // emptyfile, sailboat.txt + expectCommonPrefixes: map[string]bool{"dir1/": true, "emptydir/": true}, + }, + { + startAfter: "dir0/file10.txt", + maxKeys: 15, + delimiter: "/", + expectKeys: 2, + expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true, "emptydir/": true}, + }, + { + startAfter: "dir0/file10.txt", + maxKeys: 15, + prefix: "d", + delimiter: "/", + expectKeys: 0, + expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true}, + }, + } { + c.Logf("[trial %+v]", trial) + params := aws_s3.ListObjectsV2Input{ + Bucket: aws_aws.String(stage.collbucket.Name), + Prefix: stringOrNil(trial.prefix), + Delimiter: stringOrNil(trial.delimiter), + StartAfter: stringOrNil(trial.startAfter), + MaxKeys: aws_aws.Int64(int64(trial.maxKeys)), + } + keySeen := map[string]bool{} + prefixSeen := map[string]bool{} + for { + result, err := client.ListObjectsV2WithContext(ctx, ¶ms) + if !c.Check(err, check.IsNil) { + break + } + c.Check(result.Name, check.DeepEquals, aws_aws.String(stage.collbucket.Name)) + c.Check(result.Prefix, check.DeepEquals, aws_aws.String(trial.prefix)) + c.Check(result.Delimiter, check.DeepEquals, aws_aws.String(trial.delimiter)) + // The following two fields are expected to be + // nil (i.e., no tag in XML response) rather + // than "" when the corresponding request + // field was empty or nil. + c.Check(result.StartAfter, check.DeepEquals, stringOrNil(trial.startAfter)) + c.Check(result.ContinuationToken, check.DeepEquals, params.ContinuationToken) + + if trial.maxKeys > 0 { + c.Check(result.MaxKeys, check.DeepEquals, aws_aws.Int64(int64(trial.maxKeys))) + c.Check(len(result.Contents)+len(result.CommonPrefixes) <= trial.maxKeys, check.Equals, true) + } else { + c.Check(result.MaxKeys, check.DeepEquals, aws_aws.Int64(int64(s3MaxKeys))) + } + + for _, ent := range result.Contents { + c.Assert(ent.Key, check.NotNil) + c.Check(*ent.Key > trial.startAfter, check.Equals, true) + c.Check(keySeen[*ent.Key], check.Equals, false, check.Commentf("dup key %q", *ent.Key)) + keySeen[*ent.Key] = true + } + for _, ent := range result.CommonPrefixes { + c.Assert(ent.Prefix, check.NotNil) + c.Check(strings.HasSuffix(*ent.Prefix, trial.delimiter), check.Equals, true, check.Commentf("bad CommonPrefix %q", *ent.Prefix)) + if strings.HasPrefix(trial.startAfter, *ent.Prefix) { + // If we asked for + // startAfter=dir0/file10.txt, + // we expect dir0/ to be + // returned as a common prefix + } else { + c.Check(*ent.Prefix > trial.startAfter, check.Equals, true) + } + c.Check(prefixSeen[*ent.Prefix], check.Equals, false, check.Commentf("dup common prefix %q", *ent.Prefix)) + prefixSeen[*ent.Prefix] = true + } + if *result.IsTruncated && c.Check(result.NextContinuationToken, check.Not(check.Equals), "") { + params.ContinuationToken = aws_aws.String(*result.NextContinuationToken) + } else { + break + } + } + c.Check(keySeen, check.HasLen, trial.expectKeys) + c.Check(prefixSeen, check.HasLen, len(trial.expectCommonPrefixes)) + if len(trial.expectCommonPrefixes) > 0 { + c.Check(prefixSeen, check.DeepEquals, trial.expectCommonPrefixes) + } + } +} + +func (s *IntegrationSuite) TestS3ListObjectsV2EncodingTypeURL(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + dirs := 2 + filesPerDir := 40 + stage.writeBigDirs(c, dirs, filesPerDir) + + sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{ + Region: aws_aws.String("auto"), + Endpoint: aws_aws.String(s.testServer.URL), + Credentials: aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""), + S3ForcePathStyle: aws_aws.Bool(true), + })) + + client := aws_s3.New(sess) + ctx := context.Background() + + result, err := client.ListObjectsV2WithContext(ctx, &aws_s3.ListObjectsV2Input{ + Bucket: aws_aws.String(stage.collbucket.Name), + Prefix: aws_aws.String("dir0/"), + Delimiter: aws_aws.String("/"), + StartAfter: aws_aws.String("dir0/"), + EncodingType: aws_aws.String("url"), + }) + c.Assert(err, check.IsNil) + c.Check(*result.Prefix, check.Equals, "dir0%2F") + c.Check(*result.Delimiter, check.Equals, "%2F") + c.Check(*result.StartAfter, check.Equals, "dir0%2F") + for _, ent := range result.Contents { + c.Check(*ent.Key, check.Matches, "dir0%2F.*") + } + result, err = client.ListObjectsV2WithContext(ctx, &aws_s3.ListObjectsV2Input{ + Bucket: aws_aws.String(stage.collbucket.Name), + Delimiter: aws_aws.String("/"), + EncodingType: aws_aws.String("url"), + }) + c.Assert(err, check.IsNil) + c.Check(*result.Delimiter, check.Equals, "%2F") + c.Check(result.CommonPrefixes, check.HasLen, dirs+1) + for _, ent := range result.CommonPrefixes { + c.Check(*ent.Prefix, check.Matches, ".*%2F") + } +} + // TestS3cmd checks compatibility with the s3cmd command line tool, if // it's installed. As of Debian buster, s3cmd is only in backports, so // `arvados-server install` don't install it, and this test skips if @@ -679,8 +1185,27 @@ func (s *IntegrationSuite) TestS3cmd(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) - cmd := exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.Addr, "--host-bucket="+s.testServer.Addr, "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "ls", "s3://"+arvadostest.FooCollection) + cmd := exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "ls", "s3://"+arvadostest.FooCollection) buf, err := cmd.CombinedOutput() c.Check(err, check.IsNil) c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`) + + // This tests whether s3cmd's path normalization agrees with + // keep-web's signature verification wrt chars like "|" + // (neither reserved nor unreserved) and "," (not normally + // percent-encoded in a path). + tmpfile := c.MkDir() + "/dstfile" + cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo,;$[|]bar", tmpfile) + buf, err = cmd.CombinedOutput() + c.Check(err, check.NotNil) + c.Check(string(buf), check.Matches, `(?ms).*NoSuchKey.*\n`) +} + +func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + + hdr, body, _ := s.runCurl(c, "AWS "+arvadostest.ActiveTokenV2+":none", stage.coll.UUID+".collections.example.com", "/sailboat.txt") + c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`) + c.Check(body, check.Equals, "⛵\n") }