//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package keepweb
import (
"bytes"
"crypto/sha256"
"fmt"
"io/ioutil"
+ "mime"
"net/http"
"net/http/httptest"
"net/url"
"os"
"os/exec"
+ "sort"
"strings"
"sync"
"time"
kc *keepclient.KeepClient
proj arvados.Group
projbucket *s3.Bucket
+ subproj arvados.Group
coll arvados.Collection
collbucket *s3.Bucket
}
func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
- var proj arvados.Group
+ var proj, subproj arvados.Group
var coll arvados.Collection
arv := arvados.NewClientFromEnv()
arv.AuthToken = arvadostest.ActiveToken
"group": map[string]interface{}{
"group_class": "project",
"name": "keep-web s3 test",
+ "properties": map[string]interface{}{
+ "project-properties-key": "project properties value",
+ },
},
"ensure_unique_name": true,
})
c.Assert(err, check.IsNil)
+ err = arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
+ "group": map[string]interface{}{
+ "owner_uuid": proj.UUID,
+ "group_class": "project",
+ "name": "keep-web s3 test subproject",
+ "properties": map[string]interface{}{
+ "subproject_properties_key": "subproject properties value",
+ "invalid header key": "this value will not be returned because key contains spaces",
+ },
+ },
+ })
+ c.Assert(err, check.IsNil)
err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
"owner_uuid": proj.UUID,
"name": "keep-web s3 test collection",
"manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
+ "properties": map[string]interface{}{
+ "string": "string value",
+ "array": []string{"element1", "element2"},
+ "object": map[string]interface{}{"key": map[string]interface{}{"key2": "value⛵"}},
+ "nonascii": "⛵",
+ "newline": "foo\r\nX-Bad: header",
+ // This key cannot be expressed as a MIME
+ // header key, so it will be silently skipped
+ // (see "Inject" in PropertiesAsMetadata test)
+ "a: a\r\nInject": "bogus",
+ },
}})
c.Assert(err, check.IsNil)
ac, err := arvadosclient.New(arv)
auth := aws.NewAuth(arvadostest.ActiveTokenUUID, arvadostest.ActiveToken, "", time.Now().Add(time.Hour))
region := aws.Region{
Name: "zzzzz",
- S3Endpoint: "http://" + s.testServer.Addr,
+ S3Endpoint: s.testServer.URL,
}
client := s3.New(*auth, region)
client.Signature = aws.V4Signature
S3: client,
Name: proj.UUID,
},
- coll: coll,
+ subproj: subproj,
+ coll: coll,
collbucket: &s3.Bucket{
S3: client,
Name: coll.UUID,
c.Check(exists, check.Equals, true)
}
+func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) {
+ got := map[string]string{}
+ for hk, hv := range hdr {
+ if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 {
+ got[k] = hv[0]
+ }
+ }
+ c.Check(got, check.DeepEquals, expect)
+}
+
+func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) {
+ stage := s.s3setup(c)
+ defer stage.teardown(c)
+
+ expectCollectionTags := map[string]string{
+ "String": "string value",
+ "Array": `["element1","element2"]`,
+ "Object": mime.BEncoding.Encode("UTF-8", `{"key":{"key2":"value⛵"}}`),
+ "Nonascii": "=?UTF-8?b?4pu1?=",
+ "Newline": mime.BEncoding.Encode("UTF-8", "foo\r\nX-Bad: header"),
+ }
+ expectSubprojectTags := map[string]string{
+ "Subproject_properties_key": "subproject properties value",
+ }
+ expectProjectTags := map[string]string{
+ "Project-Properties-Key": "project properties value",
+ }
+
+ c.Log("HEAD object with metadata from collection")
+ resp, err := stage.collbucket.Head("sailboat.txt", nil)
+ c.Assert(err, check.IsNil)
+ s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+ c.Log("GET object with metadata from collection")
+ rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt")
+ c.Assert(err, check.IsNil)
+ content, err := ioutil.ReadAll(rdr)
+ c.Check(err, check.IsNil)
+ rdr.Close()
+ c.Check(content, check.HasLen, 4)
+ s.checkMetaEquals(c, hdr, expectCollectionTags)
+ c.Check(hdr["Inject"], check.IsNil)
+
+ c.Log("HEAD bucket with metadata from collection")
+ resp, err = stage.collbucket.Head("/", nil)
+ c.Assert(err, check.IsNil)
+ s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+ c.Log("HEAD directory placeholder with metadata from collection")
+ resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil)
+ c.Assert(err, check.IsNil)
+ s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+ c.Log("HEAD file with metadata from collection")
+ resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil)
+ c.Assert(err, check.IsNil)
+ s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+ c.Log("HEAD directory placeholder with metadata from subproject")
+ resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil)
+ c.Assert(err, check.IsNil)
+ s.checkMetaEquals(c, resp.Header, expectSubprojectTags)
+
+ c.Log("HEAD bucket with metadata from project")
+ resp, err = stage.projbucket.Head("/", nil)
+ c.Assert(err, check.IsNil)
+ s.checkMetaEquals(c, resp.Header, expectProjectTags)
+}
+
func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
stage := s.s3setup(c)
defer stage.teardown(c)
- s.testS3PutObjectSuccess(c, stage.collbucket, "")
+ s.testS3PutObjectSuccess(c, stage.collbucket, "", stage.coll.UUID)
}
func (s *IntegrationSuite) TestS3ProjectPutObjectSuccess(c *check.C) {
stage := s.s3setup(c)
defer stage.teardown(c)
- s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/")
+ s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/", stage.coll.UUID)
}
-func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string) {
+func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string, collUUID string) {
+ // We insert a delay between test cases to ensure we exercise
+ // rollover of expired sessions.
+ sleep := time.Second / 100
+ s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(sleep * 3)
+
for _, trial := range []struct {
path string
size int
contentType: "application/x-directory",
},
} {
+ time.Sleep(sleep)
c.Logf("=== %v", trial)
objname := prefix + trial.path
if !c.Check(err, check.NotNil) {
continue
}
- c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
+ c.Check(err.(*s3.Error).StatusCode, check.Equals, http.StatusNotFound)
c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
if !c.Check(err, check.ErrorMatches, `The specified key does not exist.`) {
continue
c.Check(err, check.IsNil)
rdr, err := bucket.GetReader(objname)
- if strings.HasSuffix(trial.path, "/") && !s.testServer.Config.cluster.Collections.S3FolderObjects {
+ if strings.HasSuffix(trial.path, "/") && !s.handler.Cluster.Collections.S3FolderObjects {
c.Check(err, check.NotNil)
continue
} else if !c.Check(err, check.IsNil) {
c.Check(err, check.IsNil)
c.Check(buf2, check.HasLen, len(buf))
c.Check(bytes.Equal(buf, buf2), check.Equals, true)
+
+ // Check that the change is immediately visible via
+ // (non-S3) webdav request.
+ _, resp := s.do("GET", "http://"+collUUID+".keep-web.example/"+trial.path, arvadostest.ActiveTokenV2, nil)
+ c.Check(resp.Code, check.Equals, http.StatusOK)
+ if !strings.HasSuffix(trial.path, "/") {
+ c.Check(resp.Body.Len(), check.Equals, trial.size)
+ }
}
}
err = bucket.PutReader(trial.path, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
c.Check(err.(*s3.Error).StatusCode, check.Equals, 400)
c.Check(err.(*s3.Error).Code, check.Equals, `InvalidArgument`)
- c.Check(err, check.ErrorMatches, `(mkdir "/by_id/zzzzz-j7d0g-[a-z0-9]{15}/newdir2?"|open "/zzzzz-j7d0g-[a-z0-9]{15}/newfile") failed: invalid argument`)
+ c.Check(err, check.ErrorMatches, `(mkdir "/by_id/zzzzz-j7d0g-[a-z0-9]{15}/newdir2?"|open "/zzzzz-j7d0g-[a-z0-9]{15}/newfile") failed: invalid (argument|operation)`)
_, err = bucket.GetReader(trial.path)
c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
s.testS3DeleteObject(c, stage.projbucket, stage.coll.Name+"/")
}
func (s *IntegrationSuite) testS3DeleteObject(c *check.C, bucket *s3.Bucket, prefix string) {
- s.testServer.Config.cluster.Collections.S3FolderObjects = true
+ s.handler.Cluster.Collections.S3FolderObjects = true
for _, trial := range []struct {
path string
}{
s.testS3PutObjectFailure(c, stage.projbucket, stage.coll.Name+"/")
}
func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, prefix string) {
- s.testServer.Config.cluster.Collections.S3FolderObjects = false
+ s.handler.Cluster.Collections.S3FolderObjects = false
var wg sync.WaitGroup
for _, trial := range []struct {
c.Assert(err, check.IsNil)
s.sign(c, req, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken)
rr := httptest.NewRecorder()
- s.testServer.Server.Handler.ServeHTTP(rr, req)
+ s.handler.ServeHTTP(rr, req)
resp := rr.Result()
c.Check(resp.StatusCode, check.Equals, trial.responseCode)
body, err := ioutil.ReadAll(resp.Body)
{"/foo%2fbar", "/foo/bar"}, // / must not be escaped
{"/(foo)/[];,", "/%28foo%29/%5B%5D%3B%2C"}, // ()[];, must be escaped
{"/foo%5bbar", "/foo%5Bbar"}, // %XX must be uppercase
- {"//foo///bar", "/foo/bar"}, // "//" and "///" must be squashed to "/"
+ {"//foo///.bar", "/foo/.bar"}, // "//" and "///" must be squashed to "/"
} {
c.Logf("trial %q", trial)
defer stage.teardown(c)
var markers int
- for markers, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} {
- dirs := 2
- filesPerDir := 1001
+ for markers, s.handler.Cluster.Collections.S3FolderObjects = range []bool{false, true} {
+ dirs := 2000
+ filesPerDir := 2
stage.writeBigDirs(c, dirs, filesPerDir)
// Total # objects is:
// 2 file entries from s3setup (emptyfile and sailboat.txt)
// +filesPerDir*dirs file entries from writeBigDirs (dir0/file0.txt, etc.)
s.testS3List(c, stage.collbucket, "", 4000, markers+2+(filesPerDir+markers)*dirs)
s.testS3List(c, stage.collbucket, "", 131, markers+2+(filesPerDir+markers)*dirs)
+ s.testS3List(c, stage.collbucket, "", 51, markers+2+(filesPerDir+markers)*dirs)
s.testS3List(c, stage.collbucket, "dir0/", 71, filesPerDir+markers)
}
}
func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix string, pageSize, expectFiles int) {
- c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.testServer.Config.cluster.Collections.S3FolderObjects)
+ c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.handler.Cluster.Collections.S3FolderObjects)
expectPageSize := pageSize
if expectPageSize > 1000 {
expectPageSize = 1000
break
}
for _, key := range resp.Contents {
+ if _, dup := gotKeys[key.Key]; dup {
+ c.Errorf("got duplicate key %q on page %d", key.Key, pages)
+ }
gotKeys[key.Key] = key
if strings.Contains(key.Key, "sailboat.txt") {
c.Check(key.Size, check.Equals, int64(4))
}
nextMarker = resp.NextMarker
}
- c.Check(len(gotKeys), check.Equals, expectFiles)
+ if !c.Check(len(gotKeys), check.Equals, expectFiles) {
+ var sorted []string
+ for k := range gotKeys {
+ sorted = append(sorted, k)
+ }
+ sort.Strings(sorted)
+ for _, k := range sorted {
+ c.Logf("got %s", k)
+ }
+ }
}
func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) {
- for _, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} {
+ for _, s.handler.Cluster.Collections.S3FolderObjects = range []bool{false, true} {
s.testS3CollectionListRollup(c)
}
}
}
}
markers := 0
- if s.testServer.Config.cluster.Collections.S3FolderObjects {
+ if s.handler.Cluster.Collections.S3FolderObjects {
markers = 1
}
c.Check(allfiles, check.HasLen, dirs*(filesPerDir+markers)+3+markers)
{"dir0", "", ""},
{"dir0/", "", ""},
{"dir0/f", "", ""},
- {"dir0", "/", "dir0/file14.txt"}, // no commonprefixes
+ {"dir0", "/", "dir0/file14.txt"}, // one commonprefix, "dir0/"
+ {"dir0", "/", "dir0/zzzzfile.txt"}, // no commonprefixes
{"", "", "dir0/file14.txt"}, // middle page, skip walking dir1
{"", "", "dir1/file14.txt"}, // middle page, skip walking dir0
{"", "", "dir1/file498.txt"}, // last page of results
var expectTruncated bool
for _, key := range allfiles {
full := len(expectKeys)+len(expectPrefixes) >= maxKeys
- if !strings.HasPrefix(key, trial.prefix) || key < trial.marker {
+ if !strings.HasPrefix(key, trial.prefix) || key <= trial.marker {
continue
} else if idx := strings.Index(key[len(trial.prefix):], trial.delimiter); trial.delimiter != "" && idx >= 0 {
prefix := key[:len(trial.prefix)+idx+1]
if len(expectPrefixes) > 0 && expectPrefixes[len(expectPrefixes)-1] == prefix {
// same prefix as previous key
} else if full {
- expectNextMarker = key
expectTruncated = true
} else {
expectPrefixes = append(expectPrefixes, prefix)
+ expectNextMarker = prefix
}
} else if full {
- if trial.delimiter != "" {
- expectNextMarker = key
- }
expectTruncated = true
break
} else {
expectKeys = append(expectKeys, key)
+ if trial.delimiter != "" {
+ expectNextMarker = key
+ }
}
}
+ if !expectTruncated {
+ expectNextMarker = ""
+ }
var gotKeys []string
for _, key := range resp.Contents {
}
}
+func (s *IntegrationSuite) TestS3ListObjectsV2ManySubprojects(c *check.C) {
+ stage := s.s3setup(c)
+ defer stage.teardown(c)
+ projects := 50
+ collectionsPerProject := 2
+ for i := 0; i < projects; i++ {
+ var subproj arvados.Group
+ err := stage.arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
+ "group": map[string]interface{}{
+ "owner_uuid": stage.subproj.UUID,
+ "group_class": "project",
+ "name": fmt.Sprintf("keep-web s3 test subproject %d", i),
+ },
+ })
+ c.Assert(err, check.IsNil)
+ for j := 0; j < collectionsPerProject; j++ {
+ err = stage.arv.RequestAndDecode(nil, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
+ "owner_uuid": subproj.UUID,
+ "name": fmt.Sprintf("keep-web s3 test collection %d", j),
+ "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
+ }})
+ c.Assert(err, check.IsNil)
+ }
+ }
+ c.Logf("setup complete")
+
+ sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
+ Region: aws_aws.String("auto"),
+ Endpoint: aws_aws.String(s.testServer.URL),
+ Credentials: aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
+ S3ForcePathStyle: aws_aws.Bool(true),
+ }))
+ client := aws_s3.New(sess)
+ ctx := context.Background()
+ params := aws_s3.ListObjectsV2Input{
+ Bucket: aws_aws.String(stage.proj.UUID),
+ Delimiter: aws_aws.String("/"),
+ Prefix: aws_aws.String("keep-web s3 test subproject/"),
+ MaxKeys: aws_aws.Int64(int64(projects / 2)),
+ }
+ for page := 1; ; page++ {
+ t0 := time.Now()
+ result, err := client.ListObjectsV2WithContext(ctx, ¶ms)
+ if !c.Check(err, check.IsNil) {
+ break
+ }
+ c.Logf("got page %d in %v with len(Contents) == %d, len(CommonPrefixes) == %d", page, time.Since(t0), len(result.Contents), len(result.CommonPrefixes))
+ if !*result.IsTruncated {
+ break
+ }
+ params.ContinuationToken = result.NextContinuationToken
+ *params.MaxKeys = *params.MaxKeys/2 + 1
+ }
+}
+
func (s *IntegrationSuite) TestS3ListObjectsV2(c *check.C) {
stage := s.s3setup(c)
defer stage.teardown(c)
sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
Region: aws_aws.String("auto"),
- Endpoint: aws_aws.String("http://" + s.testServer.Addr),
+ Endpoint: aws_aws.String(s.testServer.URL),
Credentials: aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
S3ForcePathStyle: aws_aws.Bool(true),
}))
sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
Region: aws_aws.String("auto"),
- Endpoint: aws_aws.String("http://" + s.testServer.Addr),
+ Endpoint: aws_aws.String(s.testServer.URL),
Credentials: aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
S3ForcePathStyle: aws_aws.Bool(true),
}))
stage := s.s3setup(c)
defer stage.teardown(c)
- cmd := exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.Addr, "--host-bucket="+s.testServer.Addr, "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "ls", "s3://"+arvadostest.FooCollection)
+ cmd := exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "ls", "s3://"+arvadostest.FooCollection)
buf, err := cmd.CombinedOutput()
c.Check(err, check.IsNil)
c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`)
// keep-web's signature verification wrt chars like "|"
// (neither reserved nor unreserved) and "," (not normally
// percent-encoded in a path).
- cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.Addr, "--host-bucket="+s.testServer.Addr, "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo,;$[|]bar")
+ tmpfile := c.MkDir() + "/dstfile"
+ cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo,;$[|]bar", tmpfile)
buf, err = cmd.CombinedOutput()
c.Check(err, check.NotNil)
- c.Check(string(buf), check.Matches, `(?ms).*NoSuchKey.*\n`)
+ // As of commit b7520e5c25e1bf25c1a8bf5aa2eadb299be8f606
+ // (between debian bullseye and bookworm versions), s3cmd
+ // started catching the NoSuchKey error code and replacing it
+ // with "Source object '%s' does not exist.".
+ c.Check(string(buf), check.Matches, `(?ms).*(NoSuchKey|Source object.*does not exist).*\n`)
}
func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) {