// Copyright (C) The Arvados Authors. All rights reserved. // // SPDX-License-Identifier: AGPL-3.0 package main import ( "bytes" "crypto/rand" "crypto/sha256" "fmt" "io/ioutil" "net/http" "net/http/httptest" "net/url" "os" "os/exec" "strings" "sync" "time" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/arvadostest" "git.arvados.org/arvados.git/sdk/go/keepclient" "github.com/AdRoll/goamz/aws" "github.com/AdRoll/goamz/s3" check "gopkg.in/check.v1" ) type s3stage struct { arv *arvados.Client ac *arvadosclient.ArvadosClient kc *keepclient.KeepClient proj arvados.Group projbucket *s3.Bucket coll arvados.Collection collbucket *s3.Bucket } func (s *IntegrationSuite) s3setup(c *check.C) s3stage { var proj arvados.Group var coll arvados.Collection arv := arvados.NewClientFromEnv() arv.AuthToken = arvadostest.ActiveToken err := arv.RequestAndDecode(&proj, "POST", "arvados/v1/groups", nil, map[string]interface{}{ "group": map[string]interface{}{ "group_class": "project", "name": "keep-web s3 test", }, "ensure_unique_name": true, }) c.Assert(err, check.IsNil) err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{ "owner_uuid": proj.UUID, "name": "keep-web s3 test collection", "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n", }}) c.Assert(err, check.IsNil) ac, err := arvadosclient.New(arv) c.Assert(err, check.IsNil) kc, err := keepclient.MakeKeepClient(ac) c.Assert(err, check.IsNil) fs, err := coll.FileSystem(arv, kc) c.Assert(err, check.IsNil) f, err := fs.OpenFile("sailboat.txt", os.O_CREATE|os.O_WRONLY, 0644) c.Assert(err, check.IsNil) _, err = f.Write([]byte("⛵\n")) c.Assert(err, check.IsNil) err = f.Close() c.Assert(err, check.IsNil) err = fs.Sync() c.Assert(err, check.IsNil) err = arv.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil) c.Assert(err, check.IsNil) auth := aws.NewAuth(arvadostest.ActiveTokenUUID, arvadostest.ActiveToken, "", time.Now().Add(time.Hour)) region := aws.Region{ Name: "zzzzz", S3Endpoint: "http://" + s.testServer.Addr, } client := s3.New(*auth, region) client.Signature = aws.V4Signature return s3stage{ arv: arv, ac: ac, kc: kc, proj: proj, projbucket: &s3.Bucket{ S3: client, Name: proj.UUID, }, coll: coll, collbucket: &s3.Bucket{ S3: client, Name: coll.UUID, }, } } func (stage s3stage) teardown(c *check.C) { if stage.coll.UUID != "" { err := stage.arv.RequestAndDecode(&stage.coll, "DELETE", "arvados/v1/collections/"+stage.coll.UUID, nil, nil) c.Check(err, check.IsNil) } if stage.proj.UUID != "" { err := stage.arv.RequestAndDecode(&stage.proj, "DELETE", "arvados/v1/groups/"+stage.proj.UUID, nil, nil) c.Check(err, check.IsNil) } } func (s *IntegrationSuite) TestS3Signatures(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) bucket := stage.collbucket for _, trial := range []struct { success bool signature int accesskey string secretkey string }{ {true, aws.V2Signature, arvadostest.ActiveToken, "none"}, {true, aws.V2Signature, url.QueryEscape(arvadostest.ActiveTokenV2), "none"}, {true, aws.V2Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), "none"}, {false, aws.V2Signature, "none", "none"}, {false, aws.V2Signature, "none", arvadostest.ActiveToken}, {true, aws.V4Signature, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken}, {true, aws.V4Signature, arvadostest.ActiveToken, arvadostest.ActiveToken}, {true, aws.V4Signature, url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2)}, {true, aws.V4Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1)}, {false, aws.V4Signature, arvadostest.ActiveToken, ""}, {false, aws.V4Signature, arvadostest.ActiveToken, "none"}, {false, aws.V4Signature, "none", arvadostest.ActiveToken}, {false, aws.V4Signature, "none", "none"}, } { c.Logf("%#v", trial) bucket.S3.Auth = *(aws.NewAuth(trial.accesskey, trial.secretkey, "", time.Now().Add(time.Hour))) bucket.S3.Signature = trial.signature _, err := bucket.GetReader("emptyfile") if trial.success { c.Check(err, check.IsNil) } else { c.Check(err, check.NotNil) } } } func (s *IntegrationSuite) TestS3HeadBucket(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} { c.Logf("bucket %s", bucket.Name) exists, err := bucket.Exists("") c.Check(err, check.IsNil) c.Check(exists, check.Equals, true) } } func (s *IntegrationSuite) TestS3CollectionGetObject(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) s.testS3GetObject(c, stage.collbucket, "") } func (s *IntegrationSuite) TestS3ProjectGetObject(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) s.testS3GetObject(c, stage.projbucket, stage.coll.Name+"/") } func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix string) { rdr, err := bucket.GetReader(prefix + "emptyfile") c.Assert(err, check.IsNil) buf, err := ioutil.ReadAll(rdr) c.Check(err, check.IsNil) c.Check(len(buf), check.Equals, 0) err = rdr.Close() c.Check(err, check.IsNil) // GetObject rdr, err = bucket.GetReader(prefix + "missingfile") c.Check(err.(*s3.Error).StatusCode, check.Equals, 404) c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`) c.Check(err, check.ErrorMatches, `The specified key does not exist.`) // HeadObject exists, err := bucket.Exists(prefix + "missingfile") c.Check(err, check.IsNil) c.Check(exists, check.Equals, false) // GetObject rdr, err = bucket.GetReader(prefix + "sailboat.txt") c.Assert(err, check.IsNil) buf, err = ioutil.ReadAll(rdr) c.Check(err, check.IsNil) c.Check(buf, check.DeepEquals, []byte("⛵\n")) err = rdr.Close() c.Check(err, check.IsNil) // HeadObject resp, err := bucket.Head(prefix+"sailboat.txt", nil) c.Check(err, check.IsNil) c.Check(resp.StatusCode, check.Equals, http.StatusOK) c.Check(resp.ContentLength, check.Equals, int64(4)) // HeadObject with superfluous leading slashes exists, err = bucket.Exists(prefix + "//sailboat.txt") c.Check(err, check.IsNil) c.Check(exists, check.Equals, true) } func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) s.testS3PutObjectSuccess(c, stage.collbucket, "") } func (s *IntegrationSuite) TestS3ProjectPutObjectSuccess(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/") } func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string) { for _, trial := range []struct { path string size int contentType string }{ { path: "newfile", size: 128000000, contentType: "application/octet-stream", }, { path: "newdir/newfile", size: 1 << 26, contentType: "application/octet-stream", }, { path: "/aaa", size: 2, contentType: "application/octet-stream", }, { path: "//bbb", size: 2, contentType: "application/octet-stream", }, { path: "ccc//", size: 0, contentType: "application/x-directory", }, { path: "newdir1/newdir2/newfile", size: 0, contentType: "application/octet-stream", }, { path: "newdir1/newdir2/newdir3/", size: 0, contentType: "application/x-directory", }, } { c.Logf("=== %v", trial) objname := prefix + trial.path _, err := bucket.GetReader(objname) if !c.Check(err, check.NotNil) { continue } c.Check(err.(*s3.Error).StatusCode, check.Equals, 404) c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`) if !c.Check(err, check.ErrorMatches, `The specified key does not exist.`) { continue } buf := make([]byte, trial.size) rand.Read(buf) err = bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{}) c.Check(err, check.IsNil) rdr, err := bucket.GetReader(objname) if strings.HasSuffix(trial.path, "/") && !s.testServer.Config.cluster.Collections.S3FolderObjects { c.Check(err, check.NotNil) continue } else if !c.Check(err, check.IsNil) { continue } buf2, err := ioutil.ReadAll(rdr) c.Check(err, check.IsNil) c.Check(buf2, check.HasLen, len(buf)) c.Check(bytes.Equal(buf, buf2), check.Equals, true) } } func (s *IntegrationSuite) TestS3ProjectPutObjectNotSupported(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) bucket := stage.projbucket for _, trial := range []struct { path string size int contentType string }{ { path: "newfile", size: 1234, contentType: "application/octet-stream", }, { path: "newdir/newfile", size: 1234, contentType: "application/octet-stream", }, { path: "newdir2/", size: 0, contentType: "application/x-directory", }, } { c.Logf("=== %v", trial) _, err := bucket.GetReader(trial.path) c.Check(err.(*s3.Error).StatusCode, check.Equals, 404) c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`) c.Assert(err, check.ErrorMatches, `The specified key does not exist.`) buf := make([]byte, trial.size) rand.Read(buf) err = bucket.PutReader(trial.path, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{}) c.Check(err.(*s3.Error).StatusCode, check.Equals, 400) c.Check(err.(*s3.Error).Code, check.Equals, `InvalidArgument`) c.Check(err, check.ErrorMatches, `(mkdir "/by_id/zzzzz-j7d0g-[a-z0-9]{15}/newdir2?"|open "/zzzzz-j7d0g-[a-z0-9]{15}/newfile") failed: invalid argument`) _, err = bucket.GetReader(trial.path) c.Check(err.(*s3.Error).StatusCode, check.Equals, 404) c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`) c.Assert(err, check.ErrorMatches, `The specified key does not exist.`) } } func (s *IntegrationSuite) TestS3CollectionDeleteObject(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) s.testS3DeleteObject(c, stage.collbucket, "") } func (s *IntegrationSuite) TestS3ProjectDeleteObject(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) s.testS3DeleteObject(c, stage.projbucket, stage.coll.Name+"/") } func (s *IntegrationSuite) testS3DeleteObject(c *check.C, bucket *s3.Bucket, prefix string) { s.testServer.Config.cluster.Collections.S3FolderObjects = true for _, trial := range []struct { path string }{ {"/"}, {"nonexistentfile"}, {"emptyfile"}, {"sailboat.txt"}, {"sailboat.txt/"}, {"emptydir"}, {"emptydir/"}, } { objname := prefix + trial.path comment := check.Commentf("objname %q", objname) err := bucket.Del(objname) if trial.path == "/" { c.Check(err, check.NotNil) continue } c.Check(err, check.IsNil, comment) _, err = bucket.GetReader(objname) c.Check(err, check.NotNil, comment) } } func (s *IntegrationSuite) TestS3CollectionPutObjectFailure(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) s.testS3PutObjectFailure(c, stage.collbucket, "") } func (s *IntegrationSuite) TestS3ProjectPutObjectFailure(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) s.testS3PutObjectFailure(c, stage.projbucket, stage.coll.Name+"/") } func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, prefix string) { s.testServer.Config.cluster.Collections.S3FolderObjects = false var wg sync.WaitGroup for _, trial := range []struct { path string }{ { path: "emptyfile/newname", // emptyfile exists, see s3setup() }, { path: "emptyfile/", // emptyfile exists, see s3setup() }, { path: "emptydir", // dir already exists, see s3setup() }, { path: "emptydir/", }, { path: "emptydir//", }, { path: "newdir/", }, { path: "newdir//", }, { path: "/", }, { path: "//", }, { path: "", }, } { trial := trial wg.Add(1) go func() { defer wg.Done() c.Logf("=== %v", trial) objname := prefix + trial.path buf := make([]byte, 1234) rand.Read(buf) err := bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), "application/octet-stream", s3.Private, s3.Options{}) if !c.Check(err, check.ErrorMatches, `(invalid object name.*|open ".*" failed.*|object name conflicts with existing object|Missing object name in PUT request.)`, check.Commentf("PUT %q should fail", objname)) { return } if objname != "" && objname != "/" { _, err = bucket.GetReader(objname) c.Check(err.(*s3.Error).StatusCode, check.Equals, 404) c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`) c.Check(err, check.ErrorMatches, `The specified key does not exist.`, check.Commentf("GET %q should return 404", objname)) } }() } wg.Wait() } func (stage *s3stage) writeBigDirs(c *check.C, dirs int, filesPerDir int) { fs, err := stage.coll.FileSystem(stage.arv, stage.kc) c.Assert(err, check.IsNil) for d := 0; d < dirs; d++ { dir := fmt.Sprintf("dir%d", d) c.Assert(fs.Mkdir(dir, 0755), check.IsNil) for i := 0; i < filesPerDir; i++ { f, err := fs.OpenFile(fmt.Sprintf("%s/file%d.txt", dir, i), os.O_CREATE|os.O_WRONLY, 0644) c.Assert(err, check.IsNil) c.Assert(f.Close(), check.IsNil) } } c.Assert(fs.Sync(), check.IsNil) } func (s *IntegrationSuite) sign(c *check.C, req *http.Request, key, secret string) { scope := "20200202/zzzzz/service/aws4_request" signedHeaders := "date" req.Header.Set("Date", time.Now().UTC().Format(time.RFC1123)) stringToSign, err := s3stringToSign(s3SignAlgorithm, scope, signedHeaders, req) c.Assert(err, check.IsNil) sig, err := s3signature(secret, scope, signedHeaders, stringToSign) c.Assert(err, check.IsNil) req.Header.Set("Authorization", s3SignAlgorithm+" Credential="+key+"/"+scope+", SignedHeaders="+signedHeaders+", Signature="+sig) } func (s *IntegrationSuite) TestS3VirtualHostStyleRequests(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) for _, trial := range []struct { url string method string body string responseCode int responseRegexp []string }{ { url: "https://" + stage.collbucket.Name + ".example.com/", method: "GET", responseCode: http.StatusOK, responseRegexp: []string{`(?ms).*sailboat\.txt.*`}, }, { url: "https://" + strings.Replace(stage.coll.PortableDataHash, "+", "-", -1) + ".example.com/", method: "GET", responseCode: http.StatusOK, responseRegexp: []string{`(?ms).*sailboat\.txt.*`}, }, { url: "https://" + stage.projbucket.Name + ".example.com/?prefix=" + stage.coll.Name + "/&delimiter=/", method: "GET", responseCode: http.StatusOK, responseRegexp: []string{`(?ms).*sailboat\.txt.*`}, }, { url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/sailboat.txt", method: "GET", responseCode: http.StatusOK, responseRegexp: []string{`⛵\n`}, }, { url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/beep", method: "PUT", body: "boop", responseCode: http.StatusOK, }, { url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/beep", method: "GET", responseCode: http.StatusOK, responseRegexp: []string{`boop`}, }, { url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop", method: "GET", responseCode: http.StatusNotFound, }, { url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop", method: "PUT", body: "boop", responseCode: http.StatusOK, }, { url: "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop", method: "GET", responseCode: http.StatusOK, responseRegexp: []string{`boop`}, }, } { url, err := url.Parse(trial.url) c.Assert(err, check.IsNil) req, err := http.NewRequest(trial.method, url.String(), bytes.NewReader([]byte(trial.body))) c.Assert(err, check.IsNil) s.sign(c, req, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken) rr := httptest.NewRecorder() s.testServer.Server.Handler.ServeHTTP(rr, req) resp := rr.Result() c.Check(resp.StatusCode, check.Equals, trial.responseCode) body, err := ioutil.ReadAll(resp.Body) c.Assert(err, check.IsNil) for _, re := range trial.responseRegexp { c.Check(string(body), check.Matches, re) } } } func (s *IntegrationSuite) TestS3NormalizeURIForSignature(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) for _, trial := range []struct { rawPath string normalizedPath string }{ {"/foo", "/foo"}, // boring case {"/foo%5fbar", "/foo_bar"}, // _ must not be escaped {"/foo%2fbar", "/foo/bar"}, // / must not be escaped {"/(foo)", "/%28foo%29"}, // () must be escaped {"/foo%5bbar", "/foo%5Bbar"}, // %XX must be uppercase } { date := time.Now().UTC().Format("20060102T150405Z") scope := "20200202/zzzzz/S3/aws4_request" canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", "GET", trial.normalizedPath, "", "host:host.example.com\n", "host", "") c.Logf("canonicalRequest %q", canonicalRequest) expect := fmt.Sprintf("%s\n%s\n%s\n%s", s3SignAlgorithm, date, scope, hashdigest(sha256.New(), canonicalRequest)) c.Logf("expected stringToSign %q", expect) req, err := http.NewRequest("GET", "https://host.example.com"+trial.rawPath, nil) req.Header.Set("X-Amz-Date", date) req.Host = "host.example.com" c.Assert(err, check.IsNil) obtained, err := s3stringToSign(s3SignAlgorithm, scope, "host", req) if !c.Check(err, check.IsNil) { continue } c.Check(obtained, check.Equals, expect) } } func (s *IntegrationSuite) TestS3GetBucketLocation(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} { req, err := http.NewRequest("GET", bucket.URL("/"), nil) c.Check(err, check.IsNil) req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none") req.URL.RawQuery = "location" resp, err := http.DefaultClient.Do(req) c.Assert(err, check.IsNil) c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml") buf, err := ioutil.ReadAll(resp.Body) c.Assert(err, check.IsNil) c.Check(string(buf), check.Equals, "\nzzzzz\n") } } func (s *IntegrationSuite) TestS3GetBucketVersioning(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} { req, err := http.NewRequest("GET", bucket.URL("/"), nil) c.Check(err, check.IsNil) req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none") req.URL.RawQuery = "versioning" resp, err := http.DefaultClient.Do(req) c.Assert(err, check.IsNil) c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml") buf, err := ioutil.ReadAll(resp.Body) c.Assert(err, check.IsNil) c.Check(string(buf), check.Equals, "\n\n") } } func (s *IntegrationSuite) TestS3UnsupportedAPIs(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) for _, trial := range []struct { method string path string rawquery string }{ {"GET", "/", "acl&versionId=1234"}, // GetBucketAcl {"GET", "/foo", "acl&versionId=1234"}, // GetObjectAcl {"PUT", "/", "acl"}, // PutBucketAcl {"PUT", "/foo", "acl"}, // PutObjectAcl {"DELETE", "/", "tagging"}, // DeleteBucketTagging {"DELETE", "/foo", "tagging"}, // DeleteObjectTagging } { for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} { c.Logf("trial %v bucket %v", trial, bucket) req, err := http.NewRequest(trial.method, bucket.URL(trial.path), nil) c.Check(err, check.IsNil) req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none") req.URL.RawQuery = trial.rawquery resp, err := http.DefaultClient.Do(req) c.Assert(err, check.IsNil) c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml") buf, err := ioutil.ReadAll(resp.Body) c.Assert(err, check.IsNil) c.Check(string(buf), check.Matches, "(?ms).*InvalidRequest.*API not supported.*") } } } // If there are no CommonPrefixes entries, the CommonPrefixes XML tag // should not appear at all. func (s *IntegrationSuite) TestS3ListNoCommonPrefixes(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil) c.Assert(err, check.IsNil) req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none") req.URL.RawQuery = "prefix=asdfasdfasdf&delimiter=/" resp, err := http.DefaultClient.Do(req) c.Assert(err, check.IsNil) buf, err := ioutil.ReadAll(resp.Body) c.Assert(err, check.IsNil) c.Check(string(buf), check.Not(check.Matches), `(?ms).*CommonPrefixes.*`) } // If there is no delimiter in the request, or the results are not // truncated, the NextMarker XML tag should not appear in the response // body. func (s *IntegrationSuite) TestS3ListNoNextMarker(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) for _, query := range []string{"prefix=e&delimiter=/", ""} { req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil) c.Assert(err, check.IsNil) req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none") req.URL.RawQuery = query resp, err := http.DefaultClient.Do(req) c.Assert(err, check.IsNil) buf, err := ioutil.ReadAll(resp.Body) c.Assert(err, check.IsNil) c.Check(string(buf), check.Not(check.Matches), `(?ms).*NextMarker.*`) } } // List response should include KeyCount field. func (s *IntegrationSuite) TestS3ListKeyCount(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil) c.Assert(err, check.IsNil) req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none") req.URL.RawQuery = "prefix=&delimiter=/" resp, err := http.DefaultClient.Do(req) c.Assert(err, check.IsNil) buf, err := ioutil.ReadAll(resp.Body) c.Assert(err, check.IsNil) c.Check(string(buf), check.Matches, `(?ms).*2.*`) } func (s *IntegrationSuite) TestS3CollectionList(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) var markers int for markers, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} { dirs := 2 filesPerDir := 1001 stage.writeBigDirs(c, dirs, filesPerDir) // Total # objects is: // 2 file entries from s3setup (emptyfile and sailboat.txt) // +1 fake "directory" marker from s3setup (emptydir) (if enabled) // +dirs fake "directory" marker from writeBigDirs (dir0/, dir1/) (if enabled) // +filesPerDir*dirs file entries from writeBigDirs (dir0/file0.txt, etc.) s.testS3List(c, stage.collbucket, "", 4000, markers+2+(filesPerDir+markers)*dirs) s.testS3List(c, stage.collbucket, "", 131, markers+2+(filesPerDir+markers)*dirs) s.testS3List(c, stage.collbucket, "dir0/", 71, filesPerDir+markers) } } func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix string, pageSize, expectFiles int) { c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.testServer.Config.cluster.Collections.S3FolderObjects) expectPageSize := pageSize if expectPageSize > 1000 { expectPageSize = 1000 } gotKeys := map[string]s3.Key{} nextMarker := "" pages := 0 for { resp, err := bucket.List(prefix, "", nextMarker, pageSize) if !c.Check(err, check.IsNil) { break } c.Check(len(resp.Contents) <= expectPageSize, check.Equals, true) if pages++; !c.Check(pages <= (expectFiles/expectPageSize)+1, check.Equals, true) { break } for _, key := range resp.Contents { gotKeys[key.Key] = key if strings.Contains(key.Key, "sailboat.txt") { c.Check(key.Size, check.Equals, int64(4)) } } if !resp.IsTruncated { c.Check(resp.NextMarker, check.Equals, "") break } if !c.Check(resp.NextMarker, check.Not(check.Equals), "") { break } nextMarker = resp.NextMarker } c.Check(len(gotKeys), check.Equals, expectFiles) } func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) { for _, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} { s.testS3CollectionListRollup(c) } } func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) dirs := 2 filesPerDir := 500 stage.writeBigDirs(c, dirs, filesPerDir) err := stage.collbucket.PutReader("dingbats", &bytes.Buffer{}, 0, "application/octet-stream", s3.Private, s3.Options{}) c.Assert(err, check.IsNil) var allfiles []string for marker := ""; ; { resp, err := stage.collbucket.List("", "", marker, 20000) c.Check(err, check.IsNil) for _, key := range resp.Contents { if len(allfiles) == 0 || allfiles[len(allfiles)-1] != key.Key { allfiles = append(allfiles, key.Key) } } marker = resp.NextMarker if marker == "" { break } } markers := 0 if s.testServer.Config.cluster.Collections.S3FolderObjects { markers = 1 } c.Check(allfiles, check.HasLen, dirs*(filesPerDir+markers)+3+markers) gotDirMarker := map[string]bool{} for _, name := range allfiles { isDirMarker := strings.HasSuffix(name, "/") if markers == 0 { c.Check(isDirMarker, check.Equals, false, check.Commentf("name %q", name)) } else if isDirMarker { gotDirMarker[name] = true } else if i := strings.LastIndex(name, "/"); i >= 0 { c.Check(gotDirMarker[name[:i+1]], check.Equals, true, check.Commentf("name %q", name)) gotDirMarker[name[:i+1]] = true // skip redundant complaints about this dir marker } } for _, trial := range []struct { prefix string delimiter string marker string }{ {"", "", ""}, {"di", "/", ""}, {"di", "r", ""}, {"di", "n", ""}, {"dir0", "/", ""}, {"dir0/", "/", ""}, {"dir0/f", "/", ""}, {"dir0", "", ""}, {"dir0/", "", ""}, {"dir0/f", "", ""}, {"dir0", "/", "dir0/file14.txt"}, // no commonprefixes {"", "", "dir0/file14.txt"}, // middle page, skip walking dir1 {"", "", "dir1/file14.txt"}, // middle page, skip walking dir0 {"", "", "dir1/file498.txt"}, // last page of results {"dir1/file", "", "dir1/file498.txt"}, // last page of results, with prefix {"dir1/file", "/", "dir1/file498.txt"}, // last page of results, with prefix + delimiter {"dir1", "Z", "dir1/file498.txt"}, // delimiter "Z" never appears {"dir2", "/", ""}, // prefix "dir2" does not exist {"", "/", ""}, } { c.Logf("\n\n=== trial %+v markers=%d", trial, markers) maxKeys := 20 resp, err := stage.collbucket.List(trial.prefix, trial.delimiter, trial.marker, maxKeys) c.Check(err, check.IsNil) if resp.IsTruncated && trial.delimiter == "" { // goamz List method fills in the missing // NextMarker field if resp.IsTruncated, so // now we can't really tell whether it was // sent by the server or by goamz. In cases // where it should be empty but isn't, assume // it's goamz's fault. resp.NextMarker = "" } var expectKeys []string var expectPrefixes []string var expectNextMarker string var expectTruncated bool for _, key := range allfiles { full := len(expectKeys)+len(expectPrefixes) >= maxKeys if !strings.HasPrefix(key, trial.prefix) || key < trial.marker { continue } else if idx := strings.Index(key[len(trial.prefix):], trial.delimiter); trial.delimiter != "" && idx >= 0 { prefix := key[:len(trial.prefix)+idx+1] if len(expectPrefixes) > 0 && expectPrefixes[len(expectPrefixes)-1] == prefix { // same prefix as previous key } else if full { expectNextMarker = key expectTruncated = true } else { expectPrefixes = append(expectPrefixes, prefix) } } else if full { if trial.delimiter != "" { expectNextMarker = key } expectTruncated = true break } else { expectKeys = append(expectKeys, key) } } var gotKeys []string for _, key := range resp.Contents { gotKeys = append(gotKeys, key.Key) } var gotPrefixes []string for _, prefix := range resp.CommonPrefixes { gotPrefixes = append(gotPrefixes, prefix) } commentf := check.Commentf("trial %+v markers=%d", trial, markers) c.Check(gotKeys, check.DeepEquals, expectKeys, commentf) c.Check(gotPrefixes, check.DeepEquals, expectPrefixes, commentf) c.Check(resp.NextMarker, check.Equals, expectNextMarker, commentf) c.Check(resp.IsTruncated, check.Equals, expectTruncated, commentf) c.Logf("=== trial %+v keys %q prefixes %q nextMarker %q", trial, gotKeys, gotPrefixes, resp.NextMarker) } } // TestS3cmd checks compatibility with the s3cmd command line tool, if // it's installed. As of Debian buster, s3cmd is only in backports, so // `arvados-server install` don't install it, and this test skips if // it's not installed. func (s *IntegrationSuite) TestS3cmd(c *check.C) { if _, err := exec.LookPath("s3cmd"); err != nil { c.Skip("s3cmd not found") return } stage := s.s3setup(c) defer stage.teardown(c) cmd := exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.Addr, "--host-bucket="+s.testServer.Addr, "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "ls", "s3://"+arvadostest.FooCollection) buf, err := cmd.CombinedOutput() c.Check(err, check.IsNil) c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`) } func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) hdr, body, _ := s.runCurl(c, "AWS "+arvadostest.ActiveTokenV2+":none", stage.coll.UUID+".collections.example.com", "/sailboat.txt") c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`) c.Check(body, check.Equals, "⛵\n") }