Merge branch '21998-zoe-translates-keep-web-directory-listing-encode-url'
[arvados.git] / services / keep-web / s3_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "bytes"
9         "context"
10         "crypto/rand"
11         "crypto/sha256"
12         "fmt"
13         "io/ioutil"
14         "mime"
15         "net/http"
16         "net/http/httptest"
17         "net/url"
18         "os"
19         "os/exec"
20         "sort"
21         "strings"
22         "sync"
23         "time"
24
25         "git.arvados.org/arvados.git/sdk/go/arvados"
26         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
27         "git.arvados.org/arvados.git/sdk/go/arvadostest"
28         "git.arvados.org/arvados.git/sdk/go/keepclient"
29         "github.com/AdRoll/goamz/aws"
30         "github.com/AdRoll/goamz/s3"
31         aws_aws "github.com/aws/aws-sdk-go/aws"
32         aws_credentials "github.com/aws/aws-sdk-go/aws/credentials"
33         aws_session "github.com/aws/aws-sdk-go/aws/session"
34         aws_s3 "github.com/aws/aws-sdk-go/service/s3"
35         check "gopkg.in/check.v1"
36 )
37
38 type CachedS3SecretSuite struct{}
39
40 var _ = check.Suite(&CachedS3SecretSuite{})
41
42 func (s *CachedS3SecretSuite) activeACA(expiresAt time.Time) *arvados.APIClientAuthorization {
43         return &arvados.APIClientAuthorization{
44                 UUID:      arvadostest.ActiveTokenUUID,
45                 APIToken:  arvadostest.ActiveToken,
46                 ExpiresAt: expiresAt,
47         }
48 }
49
50 func (s *CachedS3SecretSuite) TestNewCachedS3SecretExpiresBeforeTTL(c *check.C) {
51         expected := time.Unix(1<<29, 0)
52         aca := s.activeACA(expected)
53         actual := newCachedS3Secret(aca, time.Unix(1<<30, 0))
54         c.Check(actual.expiry, check.Equals, expected)
55 }
56
57 func (s *CachedS3SecretSuite) TestNewCachedS3SecretExpiresAfterTTL(c *check.C) {
58         expected := time.Unix(1<<29, 0)
59         aca := s.activeACA(time.Unix(1<<30, 0))
60         actual := newCachedS3Secret(aca, expected)
61         c.Check(actual.expiry, check.Equals, expected)
62 }
63
64 func (s *CachedS3SecretSuite) TestNewCachedS3SecretWithoutExpiry(c *check.C) {
65         expected := time.Unix(1<<29, 0)
66         aca := s.activeACA(time.Time{})
67         actual := newCachedS3Secret(aca, expected)
68         c.Check(actual.expiry, check.Equals, expected)
69 }
70
71 func (s *CachedS3SecretSuite) cachedSecretWithExpiry(expiry time.Time) *cachedS3Secret {
72         return &cachedS3Secret{
73                 auth:   s.activeACA(expiry),
74                 expiry: expiry,
75         }
76 }
77
78 func (s *CachedS3SecretSuite) TestIsValidAtEmpty(c *check.C) {
79         cache := &cachedS3Secret{}
80         c.Check(cache.isValidAt(time.Unix(0, 0)), check.Equals, false)
81         c.Check(cache.isValidAt(time.Unix(1<<31, 0)), check.Equals, false)
82 }
83
84 func (s *CachedS3SecretSuite) TestIsValidAtNoAuth(c *check.C) {
85         cache := &cachedS3Secret{expiry: time.Unix(3, 0)}
86         c.Check(cache.isValidAt(time.Unix(2, 0)), check.Equals, false)
87         c.Check(cache.isValidAt(time.Unix(4, 0)), check.Equals, false)
88 }
89
90 func (s *CachedS3SecretSuite) TestIsValidAtNoExpiry(c *check.C) {
91         cache := &cachedS3Secret{auth: s.activeACA(time.Unix(3, 0))}
92         c.Check(cache.isValidAt(time.Unix(2, 0)), check.Equals, false)
93         c.Check(cache.isValidAt(time.Unix(4, 0)), check.Equals, false)
94 }
95
96 func (s *CachedS3SecretSuite) TestIsValidAtTimeAfterExpiry(c *check.C) {
97         expiry := time.Unix(10, 0)
98         cache := s.cachedSecretWithExpiry(expiry)
99         c.Check(cache.isValidAt(expiry), check.Equals, false)
100         c.Check(cache.isValidAt(time.Unix(1<<25, 0)), check.Equals, false)
101         c.Check(cache.isValidAt(time.Unix(1<<30, 0)), check.Equals, false)
102 }
103
104 func (s *CachedS3SecretSuite) TestIsValidAtTimeBeforeExpiry(c *check.C) {
105         cache := s.cachedSecretWithExpiry(time.Unix(1<<30, 0))
106         c.Check(cache.isValidAt(time.Unix(1<<25, 0)), check.Equals, true)
107         c.Check(cache.isValidAt(time.Unix(1<<27, 0)), check.Equals, true)
108         c.Check(cache.isValidAt(time.Unix(1<<29, 0)), check.Equals, true)
109 }
110
111 func (s *CachedS3SecretSuite) TestIsValidAtZeroTime(c *check.C) {
112         cache := s.cachedSecretWithExpiry(time.Unix(10, 0))
113         c.Check(cache.isValidAt(time.Time{}), check.Equals, false)
114 }
115
116 type s3stage struct {
117         arv        *arvados.Client
118         ac         *arvadosclient.ArvadosClient
119         kc         *keepclient.KeepClient
120         proj       arvados.Group
121         projbucket *s3.Bucket
122         subproj    arvados.Group
123         coll       arvados.Collection
124         collbucket *s3.Bucket
125 }
126
127 func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
128         var proj, subproj arvados.Group
129         var coll arvados.Collection
130         arv := arvados.NewClientFromEnv()
131         arv.AuthToken = arvadostest.ActiveToken
132         err := arv.RequestAndDecode(&proj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
133                 "group": map[string]interface{}{
134                         "group_class": "project",
135                         "name":        "keep-web s3 test",
136                         "properties": map[string]interface{}{
137                                 "project-properties-key": "project properties value",
138                         },
139                 },
140                 "ensure_unique_name": true,
141         })
142         c.Assert(err, check.IsNil)
143         err = arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
144                 "group": map[string]interface{}{
145                         "owner_uuid":  proj.UUID,
146                         "group_class": "project",
147                         "name":        "keep-web s3 test subproject",
148                         "properties": map[string]interface{}{
149                                 "subproject_properties_key": "subproject properties value",
150                                 "invalid header key":        "this value will not be returned because key contains spaces",
151                         },
152                 },
153         })
154         c.Assert(err, check.IsNil)
155         err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
156                 "owner_uuid":    proj.UUID,
157                 "name":          "keep-web s3 test collection",
158                 "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
159                 "properties": map[string]interface{}{
160                         "string":   "string value",
161                         "array":    []string{"element1", "element2"},
162                         "object":   map[string]interface{}{"key": map[string]interface{}{"key2": "value⛵"}},
163                         "nonascii": "⛵",
164                         "newline":  "foo\r\nX-Bad: header",
165                         // This key cannot be expressed as a MIME
166                         // header key, so it will be silently skipped
167                         // (see "Inject" in PropertiesAsMetadata test)
168                         "a: a\r\nInject": "bogus",
169                 },
170         }})
171         c.Assert(err, check.IsNil)
172         ac, err := arvadosclient.New(arv)
173         c.Assert(err, check.IsNil)
174         kc, err := keepclient.MakeKeepClient(ac)
175         c.Assert(err, check.IsNil)
176         fs, err := coll.FileSystem(arv, kc)
177         c.Assert(err, check.IsNil)
178         f, err := fs.OpenFile("sailboat.txt", os.O_CREATE|os.O_WRONLY, 0644)
179         c.Assert(err, check.IsNil)
180         _, err = f.Write([]byte("⛵\n"))
181         c.Assert(err, check.IsNil)
182         err = f.Close()
183         c.Assert(err, check.IsNil)
184         err = fs.Sync()
185         c.Assert(err, check.IsNil)
186         err = arv.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil)
187         c.Assert(err, check.IsNil)
188
189         auth := aws.NewAuth(arvadostest.ActiveTokenUUID, arvadostest.ActiveToken, "", time.Now().Add(time.Hour))
190         region := aws.Region{
191                 Name:       "zzzzz",
192                 S3Endpoint: s.testServer.URL,
193         }
194         client := s3.New(*auth, region)
195         client.Signature = aws.V4Signature
196         return s3stage{
197                 arv:  arv,
198                 ac:   ac,
199                 kc:   kc,
200                 proj: proj,
201                 projbucket: &s3.Bucket{
202                         S3:   client,
203                         Name: proj.UUID,
204                 },
205                 subproj: subproj,
206                 coll:    coll,
207                 collbucket: &s3.Bucket{
208                         S3:   client,
209                         Name: coll.UUID,
210                 },
211         }
212 }
213
214 func (stage s3stage) teardown(c *check.C) {
215         if stage.coll.UUID != "" {
216                 err := stage.arv.RequestAndDecode(&stage.coll, "DELETE", "arvados/v1/collections/"+stage.coll.UUID, nil, nil)
217                 c.Check(err, check.IsNil)
218         }
219         if stage.proj.UUID != "" {
220                 err := stage.arv.RequestAndDecode(&stage.proj, "DELETE", "arvados/v1/groups/"+stage.proj.UUID, nil, nil)
221                 c.Check(err, check.IsNil)
222         }
223 }
224
225 func (s *IntegrationSuite) TestS3Signatures(c *check.C) {
226         stage := s.s3setup(c)
227         defer stage.teardown(c)
228
229         bucket := stage.collbucket
230         for _, trial := range []struct {
231                 success   bool
232                 signature int
233                 accesskey string
234                 secretkey string
235         }{
236                 {true, aws.V2Signature, arvadostest.ActiveToken, "none"},
237                 {true, aws.V2Signature, url.QueryEscape(arvadostest.ActiveTokenV2), "none"},
238                 {true, aws.V2Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), "none"},
239                 {false, aws.V2Signature, "none", "none"},
240                 {false, aws.V2Signature, "none", arvadostest.ActiveToken},
241
242                 {true, aws.V4Signature, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken},
243                 {true, aws.V4Signature, arvadostest.ActiveToken, arvadostest.ActiveToken},
244                 {true, aws.V4Signature, url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2)},
245                 {true, aws.V4Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1)},
246                 {false, aws.V4Signature, arvadostest.ActiveToken, ""},
247                 {false, aws.V4Signature, arvadostest.ActiveToken, "none"},
248                 {false, aws.V4Signature, "none", arvadostest.ActiveToken},
249                 {false, aws.V4Signature, "none", "none"},
250         } {
251                 c.Logf("%#v", trial)
252                 bucket.S3.Auth = *(aws.NewAuth(trial.accesskey, trial.secretkey, "", time.Now().Add(time.Hour)))
253                 bucket.S3.Signature = trial.signature
254                 _, err := bucket.GetReader("emptyfile")
255                 if trial.success {
256                         c.Check(err, check.IsNil)
257                 } else {
258                         c.Check(err, check.NotNil)
259                 }
260         }
261 }
262
263 func (s *IntegrationSuite) TestS3SecretCacheUpdates(c *check.C) {
264         stage := s.s3setup(c)
265         defer stage.teardown(c)
266         reqUrl, err := url.Parse("https://" + stage.collbucket.Name + ".example.com/")
267         c.Assert(err, check.IsNil)
268
269         for trialName, trialAuth := range map[string]string{
270                 "v1 token":                    arvadostest.ActiveToken,
271                 "token UUID":                  arvadostest.ActiveTokenUUID,
272                 "v2 token query escaped":      url.QueryEscape(arvadostest.ActiveTokenV2),
273                 "v2 token underscore escaped": strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1),
274         } {
275                 s.handler.s3SecretCache = nil
276                 req, err := http.NewRequest("GET", reqUrl.String(), bytes.NewReader(nil))
277                 if !c.Check(err, check.IsNil) {
278                         continue
279                 }
280                 secret := trialAuth
281                 if secret[5:12] == "-gj3su-" {
282                         secret = arvadostest.ActiveToken
283                 }
284                 s.sign(c, req, trialAuth, secret)
285                 rec := httptest.NewRecorder()
286                 s.handler.ServeHTTP(rec, req)
287                 if !c.Check(rec.Result().StatusCode, check.Equals, http.StatusOK,
288                         check.Commentf("%s auth did not get 200 OK response: %v", trialName, req)) {
289                         continue
290                 }
291
292                 for name, key := range map[string]string{
293                         "v1 token":   arvadostest.ActiveToken,
294                         "token UUID": arvadostest.ActiveTokenUUID,
295                         "v2 token":   arvadostest.ActiveTokenV2,
296                 } {
297                         actual, ok := s.handler.s3SecretCache[key]
298                         if c.Check(ok, check.Equals, true, check.Commentf("%s not cached from %s", name, trialName)) {
299                                 c.Check(actual.auth.UUID, check.Equals, arvadostest.ActiveTokenUUID)
300                         }
301                 }
302         }
303 }
304
305 func (s *IntegrationSuite) TestS3SecretCacheUsed(c *check.C) {
306         stage := s.s3setup(c)
307         defer stage.teardown(c)
308
309         token := arvadostest.ActiveToken
310         // Step 1: Make a request to get the active token in the cache.
311         reqUrl, err := url.Parse("https://" + stage.collbucket.Name + ".example.com/")
312         c.Assert(err, check.IsNil)
313         req, err := http.NewRequest("GET", reqUrl.String(), bytes.NewReader(nil))
314         s.sign(c, req, token, token)
315         rec := httptest.NewRecorder()
316         s.handler.ServeHTTP(rec, req)
317         resp := rec.Result()
318         c.Assert(resp.StatusCode, check.Equals, http.StatusOK,
319                 check.Commentf("first request did not get 200 OK response"))
320
321         // Step 2: Remove some cache keys our request doesn't rely upon.
322         c.Assert(s.handler.s3SecretCache[arvadostest.ActiveTokenUUID], check.NotNil)
323         delete(s.handler.s3SecretCache, arvadostest.ActiveTokenUUID)
324         c.Assert(s.handler.s3SecretCache[arvadostest.ActiveTokenV2], check.NotNil)
325         delete(s.handler.s3SecretCache, arvadostest.ActiveTokenV2)
326
327         // Step 3: Repeat the original request.
328         rec = httptest.NewRecorder()
329         s.handler.ServeHTTP(rec, req)
330         resp = rec.Result()
331         c.Assert(resp.StatusCode, check.Equals, http.StatusOK,
332                 check.Commentf("cached auth request did not get 200 OK response"))
333
334         // Step 4: Confirm the deleted cache keys were not re-added
335         // (which would imply the authorization was re-requested and cached).
336         c.Check(s.handler.s3SecretCache[arvadostest.ActiveTokenUUID], check.IsNil,
337                 check.Commentf("token UUID re-added to cache after removal"))
338         c.Check(s.handler.s3SecretCache[arvadostest.ActiveTokenV2], check.IsNil,
339                 check.Commentf("v2 token re-added to cache after removal"))
340 }
341
342 func (s *IntegrationSuite) TestS3SecretCacheCleanup(c *check.C) {
343         stage := s.s3setup(c)
344         defer stage.teardown(c)
345         td := -2 * s3SecretCacheTidyInterval
346         startTidied := time.Now().Add(td)
347         s.handler.s3SecretCacheNextTidy = startTidied
348         s.handler.s3SecretCache = make(map[string]*cachedS3Secret)
349         s.handler.s3SecretCache["old"] = &cachedS3Secret{expiry: startTidied.Add(td)}
350
351         reqUrl, err := url.Parse("https://" + stage.collbucket.Name + ".example.com/")
352         c.Assert(err, check.IsNil)
353         req, err := http.NewRequest("GET", reqUrl.String(), bytes.NewReader(nil))
354         token := arvadostest.ActiveToken
355         s.sign(c, req, token, token)
356         rec := httptest.NewRecorder()
357         s.handler.ServeHTTP(rec, req)
358
359         c.Check(s.handler.s3SecretCache["old"], check.IsNil,
360                 check.Commentf("expired token not removed from cache"))
361         c.Check(s.handler.s3SecretCacheNextTidy.After(startTidied), check.Equals, true,
362                 check.Commentf("s3SecretCacheNextTidy not updated"))
363         c.Check(s.handler.s3SecretCache[token], check.NotNil,
364                 check.Commentf("just-used token not found in cache"))
365 }
366
367 func (s *IntegrationSuite) TestS3HeadBucket(c *check.C) {
368         stage := s.s3setup(c)
369         defer stage.teardown(c)
370
371         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
372                 c.Logf("bucket %s", bucket.Name)
373                 exists, err := bucket.Exists("")
374                 c.Check(err, check.IsNil)
375                 c.Check(exists, check.Equals, true)
376         }
377 }
378
379 func (s *IntegrationSuite) TestS3CollectionGetObject(c *check.C) {
380         stage := s.s3setup(c)
381         defer stage.teardown(c)
382         s.testS3GetObject(c, stage.collbucket, "")
383 }
384 func (s *IntegrationSuite) TestS3ProjectGetObject(c *check.C) {
385         stage := s.s3setup(c)
386         defer stage.teardown(c)
387         s.testS3GetObject(c, stage.projbucket, stage.coll.Name+"/")
388 }
389 func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix string) {
390         rdr, err := bucket.GetReader(prefix + "emptyfile")
391         c.Assert(err, check.IsNil)
392         buf, err := ioutil.ReadAll(rdr)
393         c.Check(err, check.IsNil)
394         c.Check(len(buf), check.Equals, 0)
395         err = rdr.Close()
396         c.Check(err, check.IsNil)
397
398         // GetObject
399         rdr, err = bucket.GetReader(prefix + "missingfile")
400         c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
401         c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
402         c.Check(err, check.ErrorMatches, `The specified key does not exist.`)
403
404         // HeadObject
405         exists, err := bucket.Exists(prefix + "missingfile")
406         c.Check(err, check.IsNil)
407         c.Check(exists, check.Equals, false)
408
409         // GetObject
410         rdr, err = bucket.GetReader(prefix + "sailboat.txt")
411         c.Assert(err, check.IsNil)
412         buf, err = ioutil.ReadAll(rdr)
413         c.Check(err, check.IsNil)
414         c.Check(buf, check.DeepEquals, []byte("⛵\n"))
415         err = rdr.Close()
416         c.Check(err, check.IsNil)
417
418         // HeadObject
419         resp, err := bucket.Head(prefix+"sailboat.txt", nil)
420         c.Check(err, check.IsNil)
421         c.Check(resp.StatusCode, check.Equals, http.StatusOK)
422         c.Check(resp.ContentLength, check.Equals, int64(4))
423
424         // HeadObject with superfluous leading slashes
425         exists, err = bucket.Exists(prefix + "//sailboat.txt")
426         c.Check(err, check.IsNil)
427         c.Check(exists, check.Equals, true)
428 }
429
430 func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) {
431         got := map[string]string{}
432         for hk, hv := range hdr {
433                 if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 {
434                         got[k] = hv[0]
435                 }
436         }
437         c.Check(got, check.DeepEquals, expect)
438 }
439
440 func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) {
441         stage := s.s3setup(c)
442         defer stage.teardown(c)
443
444         expectCollectionTags := map[string]string{
445                 "String":   "string value",
446                 "Array":    `["element1","element2"]`,
447                 "Object":   mime.BEncoding.Encode("UTF-8", `{"key":{"key2":"value⛵"}}`),
448                 "Nonascii": "=?UTF-8?b?4pu1?=",
449                 "Newline":  mime.BEncoding.Encode("UTF-8", "foo\r\nX-Bad: header"),
450         }
451         expectSubprojectTags := map[string]string{
452                 "Subproject_properties_key": "subproject properties value",
453         }
454         expectProjectTags := map[string]string{
455                 "Project-Properties-Key": "project properties value",
456         }
457
458         c.Log("HEAD object with metadata from collection")
459         resp, err := stage.collbucket.Head("sailboat.txt", nil)
460         c.Assert(err, check.IsNil)
461         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
462
463         c.Log("GET object with metadata from collection")
464         rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt")
465         c.Assert(err, check.IsNil)
466         content, err := ioutil.ReadAll(rdr)
467         c.Check(err, check.IsNil)
468         rdr.Close()
469         c.Check(content, check.HasLen, 4)
470         s.checkMetaEquals(c, hdr, expectCollectionTags)
471         c.Check(hdr["Inject"], check.IsNil)
472
473         c.Log("HEAD bucket with metadata from collection")
474         resp, err = stage.collbucket.Head("/", nil)
475         c.Assert(err, check.IsNil)
476         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
477
478         c.Log("HEAD directory placeholder with metadata from collection")
479         resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil)
480         c.Assert(err, check.IsNil)
481         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
482
483         c.Log("HEAD file with metadata from collection")
484         resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil)
485         c.Assert(err, check.IsNil)
486         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
487
488         c.Log("HEAD directory placeholder with metadata from subproject")
489         resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil)
490         c.Assert(err, check.IsNil)
491         s.checkMetaEquals(c, resp.Header, expectSubprojectTags)
492
493         c.Log("HEAD bucket with metadata from project")
494         resp, err = stage.projbucket.Head("/", nil)
495         c.Assert(err, check.IsNil)
496         s.checkMetaEquals(c, resp.Header, expectProjectTags)
497 }
498
499 func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
500         stage := s.s3setup(c)
501         defer stage.teardown(c)
502         s.testS3PutObjectSuccess(c, stage.collbucket, "", stage.coll.UUID)
503 }
504 func (s *IntegrationSuite) TestS3ProjectPutObjectSuccess(c *check.C) {
505         stage := s.s3setup(c)
506         defer stage.teardown(c)
507         s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/", stage.coll.UUID)
508 }
509 func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string, collUUID string) {
510         // We insert a delay between test cases to ensure we exercise
511         // rollover of expired sessions.
512         sleep := time.Second / 100
513         s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(sleep * 3)
514
515         for _, trial := range []struct {
516                 path        string
517                 size        int
518                 contentType string
519         }{
520                 {
521                         path:        "newfile",
522                         size:        128000000,
523                         contentType: "application/octet-stream",
524                 }, {
525                         path:        "newdir/newfile",
526                         size:        1 << 26,
527                         contentType: "application/octet-stream",
528                 }, {
529                         path:        "/aaa",
530                         size:        2,
531                         contentType: "application/octet-stream",
532                 }, {
533                         path:        "//bbb",
534                         size:        2,
535                         contentType: "application/octet-stream",
536                 }, {
537                         path:        "ccc//",
538                         size:        0,
539                         contentType: "application/x-directory",
540                 }, {
541                         path:        "newdir1/newdir2/newfile",
542                         size:        0,
543                         contentType: "application/octet-stream",
544                 }, {
545                         path:        "newdir1/newdir2/newdir3/",
546                         size:        0,
547                         contentType: "application/x-directory",
548                 },
549         } {
550                 time.Sleep(sleep)
551                 c.Logf("=== %v", trial)
552
553                 objname := prefix + trial.path
554
555                 _, err := bucket.GetReader(objname)
556                 if !c.Check(err, check.NotNil) {
557                         continue
558                 }
559                 c.Check(err.(*s3.Error).StatusCode, check.Equals, http.StatusNotFound)
560                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
561                 if !c.Check(err, check.ErrorMatches, `The specified key does not exist.`) {
562                         continue
563                 }
564
565                 buf := make([]byte, trial.size)
566                 rand.Read(buf)
567
568                 err = bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
569                 c.Check(err, check.IsNil)
570
571                 rdr, err := bucket.GetReader(objname)
572                 if strings.HasSuffix(trial.path, "/") && !s.handler.Cluster.Collections.S3FolderObjects {
573                         c.Check(err, check.NotNil)
574                         continue
575                 } else if !c.Check(err, check.IsNil) {
576                         continue
577                 }
578                 buf2, err := ioutil.ReadAll(rdr)
579                 c.Check(err, check.IsNil)
580                 c.Check(buf2, check.HasLen, len(buf))
581                 c.Check(bytes.Equal(buf, buf2), check.Equals, true)
582
583                 // Check that the change is immediately visible via
584                 // (non-S3) webdav request.
585                 _, resp := s.do("GET", "http://"+collUUID+".keep-web.example/"+trial.path, arvadostest.ActiveTokenV2, nil)
586                 c.Check(resp.Code, check.Equals, http.StatusOK)
587                 if !strings.HasSuffix(trial.path, "/") {
588                         c.Check(resp.Body.Len(), check.Equals, trial.size)
589                 }
590         }
591 }
592
593 func (s *IntegrationSuite) TestS3ProjectPutObjectNotSupported(c *check.C) {
594         stage := s.s3setup(c)
595         defer stage.teardown(c)
596         bucket := stage.projbucket
597
598         for _, trial := range []struct {
599                 path        string
600                 size        int
601                 contentType string
602         }{
603                 {
604                         path:        "newfile",
605                         size:        1234,
606                         contentType: "application/octet-stream",
607                 }, {
608                         path:        "newdir/newfile",
609                         size:        1234,
610                         contentType: "application/octet-stream",
611                 }, {
612                         path:        "newdir2/",
613                         size:        0,
614                         contentType: "application/x-directory",
615                 },
616         } {
617                 c.Logf("=== %v", trial)
618
619                 _, err := bucket.GetReader(trial.path)
620                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
621                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
622                 c.Assert(err, check.ErrorMatches, `The specified key does not exist.`)
623
624                 buf := make([]byte, trial.size)
625                 rand.Read(buf)
626
627                 err = bucket.PutReader(trial.path, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
628                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 400)
629                 c.Check(err.(*s3.Error).Code, check.Equals, `InvalidArgument`)
630                 c.Check(err, check.ErrorMatches, `(mkdir "/by_id/zzzzz-j7d0g-[a-z0-9]{15}/newdir2?"|open "/zzzzz-j7d0g-[a-z0-9]{15}/newfile") failed: invalid (argument|operation)`)
631
632                 _, err = bucket.GetReader(trial.path)
633                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
634                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
635                 c.Assert(err, check.ErrorMatches, `The specified key does not exist.`)
636         }
637 }
638
639 func (s *IntegrationSuite) TestS3CollectionDeleteObject(c *check.C) {
640         stage := s.s3setup(c)
641         defer stage.teardown(c)
642         s.testS3DeleteObject(c, stage.collbucket, "")
643 }
644 func (s *IntegrationSuite) TestS3ProjectDeleteObject(c *check.C) {
645         stage := s.s3setup(c)
646         defer stage.teardown(c)
647         s.testS3DeleteObject(c, stage.projbucket, stage.coll.Name+"/")
648 }
649 func (s *IntegrationSuite) testS3DeleteObject(c *check.C, bucket *s3.Bucket, prefix string) {
650         s.handler.Cluster.Collections.S3FolderObjects = true
651         for _, trial := range []struct {
652                 path string
653         }{
654                 {"/"},
655                 {"nonexistentfile"},
656                 {"emptyfile"},
657                 {"sailboat.txt"},
658                 {"sailboat.txt/"},
659                 {"emptydir"},
660                 {"emptydir/"},
661         } {
662                 objname := prefix + trial.path
663                 comment := check.Commentf("objname %q", objname)
664
665                 err := bucket.Del(objname)
666                 if trial.path == "/" {
667                         c.Check(err, check.NotNil)
668                         continue
669                 }
670                 c.Check(err, check.IsNil, comment)
671                 _, err = bucket.GetReader(objname)
672                 c.Check(err, check.NotNil, comment)
673         }
674 }
675
676 func (s *IntegrationSuite) TestS3CollectionPutObjectFailure(c *check.C) {
677         stage := s.s3setup(c)
678         defer stage.teardown(c)
679         s.testS3PutObjectFailure(c, stage.collbucket, "")
680 }
681 func (s *IntegrationSuite) TestS3ProjectPutObjectFailure(c *check.C) {
682         stage := s.s3setup(c)
683         defer stage.teardown(c)
684         s.testS3PutObjectFailure(c, stage.projbucket, stage.coll.Name+"/")
685 }
686 func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, prefix string) {
687         s.handler.Cluster.Collections.S3FolderObjects = false
688
689         var wg sync.WaitGroup
690         for _, trial := range []struct {
691                 path string
692         }{
693                 {
694                         path: "emptyfile/newname", // emptyfile exists, see s3setup()
695                 }, {
696                         path: "emptyfile/", // emptyfile exists, see s3setup()
697                 }, {
698                         path: "emptydir", // dir already exists, see s3setup()
699                 }, {
700                         path: "emptydir/",
701                 }, {
702                         path: "emptydir//",
703                 }, {
704                         path: "newdir/",
705                 }, {
706                         path: "newdir//",
707                 }, {
708                         path: "/",
709                 }, {
710                         path: "//",
711                 }, {
712                         path: "",
713                 },
714         } {
715                 trial := trial
716                 wg.Add(1)
717                 go func() {
718                         defer wg.Done()
719                         c.Logf("=== %v", trial)
720
721                         objname := prefix + trial.path
722
723                         buf := make([]byte, 1234)
724                         rand.Read(buf)
725
726                         err := bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), "application/octet-stream", s3.Private, s3.Options{})
727                         if !c.Check(err, check.ErrorMatches, `(invalid object name.*|open ".*" failed.*|object name conflicts with existing object|Missing object name in PUT request.)`, check.Commentf("PUT %q should fail", objname)) {
728                                 return
729                         }
730
731                         if objname != "" && objname != "/" {
732                                 _, err = bucket.GetReader(objname)
733                                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
734                                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
735                                 c.Check(err, check.ErrorMatches, `The specified key does not exist.`, check.Commentf("GET %q should return 404", objname))
736                         }
737                 }()
738         }
739         wg.Wait()
740 }
741
742 func (stage *s3stage) writeBigDirs(c *check.C, dirs int, filesPerDir int) {
743         fs, err := stage.coll.FileSystem(stage.arv, stage.kc)
744         c.Assert(err, check.IsNil)
745         for d := 0; d < dirs; d++ {
746                 dir := fmt.Sprintf("dir%d", d)
747                 c.Assert(fs.Mkdir(dir, 0755), check.IsNil)
748                 for i := 0; i < filesPerDir; i++ {
749                         f, err := fs.OpenFile(fmt.Sprintf("%s/file%d.txt", dir, i), os.O_CREATE|os.O_WRONLY, 0644)
750                         c.Assert(err, check.IsNil)
751                         c.Assert(f.Close(), check.IsNil)
752                 }
753         }
754         c.Assert(fs.Sync(), check.IsNil)
755 }
756
757 func (s *IntegrationSuite) sign(c *check.C, req *http.Request, key, secret string) {
758         scope := "20200202/zzzzz/service/aws4_request"
759         signedHeaders := "date"
760         req.Header.Set("Date", time.Now().UTC().Format(time.RFC1123))
761         stringToSign, err := s3stringToSign(s3SignAlgorithm, scope, signedHeaders, req)
762         c.Assert(err, check.IsNil)
763         sig, err := s3signature(secret, scope, signedHeaders, stringToSign)
764         c.Assert(err, check.IsNil)
765         req.Header.Set("Authorization", s3SignAlgorithm+" Credential="+key+"/"+scope+", SignedHeaders="+signedHeaders+", Signature="+sig)
766 }
767
768 func (s *IntegrationSuite) TestS3VirtualHostStyleRequests(c *check.C) {
769         stage := s.s3setup(c)
770         defer stage.teardown(c)
771         for _, trial := range []struct {
772                 url            string
773                 method         string
774                 body           string
775                 responseCode   int
776                 responseRegexp []string
777                 checkEtag      bool
778         }{
779                 {
780                         url:            "https://" + stage.collbucket.Name + ".example.com/",
781                         method:         "GET",
782                         responseCode:   http.StatusOK,
783                         responseRegexp: []string{`(?ms).*sailboat\.txt.*`},
784                 },
785                 {
786                         url:            "https://" + strings.Replace(stage.coll.PortableDataHash, "+", "-", -1) + ".example.com/",
787                         method:         "GET",
788                         responseCode:   http.StatusOK,
789                         responseRegexp: []string{`(?ms).*sailboat\.txt.*`},
790                 },
791                 {
792                         url:            "https://" + stage.projbucket.Name + ".example.com/?prefix=" + stage.coll.Name + "/&delimiter=/",
793                         method:         "GET",
794                         responseCode:   http.StatusOK,
795                         responseRegexp: []string{`(?ms).*sailboat\.txt.*`},
796                 },
797                 {
798                         url:            "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/sailboat.txt",
799                         method:         "GET",
800                         responseCode:   http.StatusOK,
801                         responseRegexp: []string{`⛵\n`},
802                         checkEtag:      true,
803                 },
804                 {
805                         url:          "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/beep",
806                         method:       "PUT",
807                         body:         "boop",
808                         responseCode: http.StatusOK,
809                 },
810                 {
811                         url:            "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/beep",
812                         method:         "GET",
813                         responseCode:   http.StatusOK,
814                         responseRegexp: []string{`boop`},
815                         checkEtag:      true,
816                 },
817                 {
818                         url:          "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop",
819                         method:       "GET",
820                         responseCode: http.StatusNotFound,
821                 },
822                 {
823                         url:          "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop",
824                         method:       "PUT",
825                         body:         "boop",
826                         responseCode: http.StatusOK,
827                 },
828                 {
829                         url:            "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop",
830                         method:         "GET",
831                         responseCode:   http.StatusOK,
832                         responseRegexp: []string{`boop`},
833                         checkEtag:      true,
834                 },
835         } {
836                 url, err := url.Parse(trial.url)
837                 c.Assert(err, check.IsNil)
838                 req, err := http.NewRequest(trial.method, url.String(), bytes.NewReader([]byte(trial.body)))
839                 c.Assert(err, check.IsNil)
840                 s.sign(c, req, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken)
841                 rr := httptest.NewRecorder()
842                 s.handler.ServeHTTP(rr, req)
843                 resp := rr.Result()
844                 c.Check(resp.StatusCode, check.Equals, trial.responseCode)
845                 body, err := ioutil.ReadAll(resp.Body)
846                 c.Assert(err, check.IsNil)
847                 for _, re := range trial.responseRegexp {
848                         c.Check(string(body), check.Matches, re)
849                 }
850                 if trial.checkEtag {
851                         c.Check(resp.Header.Get("Etag"), check.Matches, `"[\da-f]{32}\+\d+"`)
852                 }
853         }
854 }
855
856 func (s *IntegrationSuite) TestS3NormalizeURIForSignature(c *check.C) {
857         stage := s.s3setup(c)
858         defer stage.teardown(c)
859         for _, trial := range []struct {
860                 rawPath        string
861                 normalizedPath string
862         }{
863                 {"/foo", "/foo"},                           // boring case
864                 {"/foo%5fbar", "/foo_bar"},                 // _ must not be escaped
865                 {"/foo%2fbar", "/foo/bar"},                 // / must not be escaped
866                 {"/(foo)/[];,", "/%28foo%29/%5B%5D%3B%2C"}, // ()[];, must be escaped
867                 {"/foo%5bbar", "/foo%5Bbar"},               // %XX must be uppercase
868                 {"//foo///.bar", "/foo/.bar"},              // "//" and "///" must be squashed to "/"
869         } {
870                 c.Logf("trial %q", trial)
871
872                 date := time.Now().UTC().Format("20060102T150405Z")
873                 scope := "20200202/zzzzz/S3/aws4_request"
874                 canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", "GET", trial.normalizedPath, "", "host:host.example.com\n", "host", "")
875                 c.Logf("canonicalRequest %q", canonicalRequest)
876                 expect := fmt.Sprintf("%s\n%s\n%s\n%s", s3SignAlgorithm, date, scope, hashdigest(sha256.New(), canonicalRequest))
877                 c.Logf("expected stringToSign %q", expect)
878
879                 req, err := http.NewRequest("GET", "https://host.example.com"+trial.rawPath, nil)
880                 req.Header.Set("X-Amz-Date", date)
881                 req.Host = "host.example.com"
882                 c.Assert(err, check.IsNil)
883
884                 obtained, err := s3stringToSign(s3SignAlgorithm, scope, "host", req)
885                 if !c.Check(err, check.IsNil) {
886                         continue
887                 }
888                 c.Check(obtained, check.Equals, expect)
889         }
890 }
891
892 func (s *IntegrationSuite) TestS3GetBucketLocation(c *check.C) {
893         stage := s.s3setup(c)
894         defer stage.teardown(c)
895         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
896                 req, err := http.NewRequest("GET", bucket.URL("/"), nil)
897                 c.Check(err, check.IsNil)
898                 req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
899                 req.URL.RawQuery = "location"
900                 resp, err := http.DefaultClient.Do(req)
901                 c.Assert(err, check.IsNil)
902                 c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
903                 buf, err := ioutil.ReadAll(resp.Body)
904                 c.Assert(err, check.IsNil)
905                 c.Check(string(buf), check.Equals, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<LocationConstraint><LocationConstraint xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">zzzzz</LocationConstraint></LocationConstraint>\n")
906         }
907 }
908
909 func (s *IntegrationSuite) TestS3GetBucketVersioning(c *check.C) {
910         stage := s.s3setup(c)
911         defer stage.teardown(c)
912         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
913                 req, err := http.NewRequest("GET", bucket.URL("/"), nil)
914                 c.Check(err, check.IsNil)
915                 req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
916                 req.URL.RawQuery = "versioning"
917                 resp, err := http.DefaultClient.Do(req)
918                 c.Assert(err, check.IsNil)
919                 c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
920                 buf, err := ioutil.ReadAll(resp.Body)
921                 c.Assert(err, check.IsNil)
922                 c.Check(string(buf), check.Equals, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<VersioningConfiguration xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\"/>\n")
923         }
924 }
925
926 func (s *IntegrationSuite) TestS3UnsupportedAPIs(c *check.C) {
927         stage := s.s3setup(c)
928         defer stage.teardown(c)
929         for _, trial := range []struct {
930                 method   string
931                 path     string
932                 rawquery string
933         }{
934                 {"GET", "/", "acl&versionId=1234"},    // GetBucketAcl
935                 {"GET", "/foo", "acl&versionId=1234"}, // GetObjectAcl
936                 {"PUT", "/", "acl"},                   // PutBucketAcl
937                 {"PUT", "/foo", "acl"},                // PutObjectAcl
938                 {"DELETE", "/", "tagging"},            // DeleteBucketTagging
939                 {"DELETE", "/foo", "tagging"},         // DeleteObjectTagging
940         } {
941                 for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
942                         c.Logf("trial %v bucket %v", trial, bucket)
943                         req, err := http.NewRequest(trial.method, bucket.URL(trial.path), nil)
944                         c.Check(err, check.IsNil)
945                         req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
946                         req.URL.RawQuery = trial.rawquery
947                         resp, err := http.DefaultClient.Do(req)
948                         c.Assert(err, check.IsNil)
949                         c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
950                         buf, err := ioutil.ReadAll(resp.Body)
951                         c.Assert(err, check.IsNil)
952                         c.Check(string(buf), check.Matches, "(?ms).*InvalidRequest.*API not supported.*")
953                 }
954         }
955 }
956
957 // If there are no CommonPrefixes entries, the CommonPrefixes XML tag
958 // should not appear at all.
959 func (s *IntegrationSuite) TestS3ListNoCommonPrefixes(c *check.C) {
960         stage := s.s3setup(c)
961         defer stage.teardown(c)
962
963         req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil)
964         c.Assert(err, check.IsNil)
965         req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
966         req.URL.RawQuery = "prefix=asdfasdfasdf&delimiter=/"
967         resp, err := http.DefaultClient.Do(req)
968         c.Assert(err, check.IsNil)
969         buf, err := ioutil.ReadAll(resp.Body)
970         c.Assert(err, check.IsNil)
971         c.Check(string(buf), check.Not(check.Matches), `(?ms).*CommonPrefixes.*`)
972 }
973
974 // If there is no delimiter in the request, or the results are not
975 // truncated, the NextMarker XML tag should not appear in the response
976 // body.
977 func (s *IntegrationSuite) TestS3ListNoNextMarker(c *check.C) {
978         stage := s.s3setup(c)
979         defer stage.teardown(c)
980
981         for _, query := range []string{"prefix=e&delimiter=/", ""} {
982                 req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil)
983                 c.Assert(err, check.IsNil)
984                 req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
985                 req.URL.RawQuery = query
986                 resp, err := http.DefaultClient.Do(req)
987                 c.Assert(err, check.IsNil)
988                 buf, err := ioutil.ReadAll(resp.Body)
989                 c.Assert(err, check.IsNil)
990                 c.Check(string(buf), check.Not(check.Matches), `(?ms).*NextMarker.*`)
991         }
992 }
993
994 // List response should include KeyCount field.
995 func (s *IntegrationSuite) TestS3ListKeyCount(c *check.C) {
996         stage := s.s3setup(c)
997         defer stage.teardown(c)
998
999         req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil)
1000         c.Assert(err, check.IsNil)
1001         req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
1002         req.URL.RawQuery = "prefix=&delimiter=/"
1003         resp, err := http.DefaultClient.Do(req)
1004         c.Assert(err, check.IsNil)
1005         buf, err := ioutil.ReadAll(resp.Body)
1006         c.Assert(err, check.IsNil)
1007         c.Check(string(buf), check.Matches, `(?ms).*<KeyCount>2</KeyCount>.*`)
1008 }
1009
1010 func (s *IntegrationSuite) TestS3CollectionList(c *check.C) {
1011         stage := s.s3setup(c)
1012         defer stage.teardown(c)
1013
1014         var markers int
1015         for markers, s.handler.Cluster.Collections.S3FolderObjects = range []bool{false, true} {
1016                 dirs := 2000
1017                 filesPerDir := 2
1018                 stage.writeBigDirs(c, dirs, filesPerDir)
1019                 // Total # objects is:
1020                 //                 2 file entries from s3setup (emptyfile and sailboat.txt)
1021                 //                +1 fake "directory" marker from s3setup (emptydir) (if enabled)
1022                 //             +dirs fake "directory" marker from writeBigDirs (dir0/, dir1/) (if enabled)
1023                 // +filesPerDir*dirs file entries from writeBigDirs (dir0/file0.txt, etc.)
1024                 s.testS3List(c, stage.collbucket, "", 4000, markers+2+(filesPerDir+markers)*dirs)
1025                 s.testS3List(c, stage.collbucket, "", 131, markers+2+(filesPerDir+markers)*dirs)
1026                 s.testS3List(c, stage.collbucket, "", 51, markers+2+(filesPerDir+markers)*dirs)
1027                 s.testS3List(c, stage.collbucket, "dir0/", 71, filesPerDir+markers)
1028         }
1029 }
1030 func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix string, pageSize, expectFiles int) {
1031         c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.handler.Cluster.Collections.S3FolderObjects)
1032         expectPageSize := pageSize
1033         if expectPageSize > 1000 {
1034                 expectPageSize = 1000
1035         }
1036         gotKeys := map[string]s3.Key{}
1037         nextMarker := ""
1038         pages := 0
1039         for {
1040                 resp, err := bucket.List(prefix, "", nextMarker, pageSize)
1041                 if !c.Check(err, check.IsNil) {
1042                         break
1043                 }
1044                 c.Check(len(resp.Contents) <= expectPageSize, check.Equals, true)
1045                 if pages++; !c.Check(pages <= (expectFiles/expectPageSize)+1, check.Equals, true) {
1046                         break
1047                 }
1048                 for _, key := range resp.Contents {
1049                         if _, dup := gotKeys[key.Key]; dup {
1050                                 c.Errorf("got duplicate key %q on page %d", key.Key, pages)
1051                         }
1052                         gotKeys[key.Key] = key
1053                         if strings.Contains(key.Key, "sailboat.txt") {
1054                                 c.Check(key.Size, check.Equals, int64(4))
1055                         }
1056                 }
1057                 if !resp.IsTruncated {
1058                         c.Check(resp.NextMarker, check.Equals, "")
1059                         break
1060                 }
1061                 if !c.Check(resp.NextMarker, check.Not(check.Equals), "") {
1062                         break
1063                 }
1064                 nextMarker = resp.NextMarker
1065         }
1066         if !c.Check(len(gotKeys), check.Equals, expectFiles) {
1067                 var sorted []string
1068                 for k := range gotKeys {
1069                         sorted = append(sorted, k)
1070                 }
1071                 sort.Strings(sorted)
1072                 for _, k := range sorted {
1073                         c.Logf("got %s", k)
1074                 }
1075         }
1076 }
1077
1078 func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) {
1079         for _, s.handler.Cluster.Collections.S3FolderObjects = range []bool{false, true} {
1080                 s.testS3CollectionListRollup(c)
1081         }
1082 }
1083
1084 func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) {
1085         stage := s.s3setup(c)
1086         defer stage.teardown(c)
1087
1088         dirs := 2
1089         filesPerDir := 500
1090         stage.writeBigDirs(c, dirs, filesPerDir)
1091         err := stage.collbucket.PutReader("dingbats", &bytes.Buffer{}, 0, "application/octet-stream", s3.Private, s3.Options{})
1092         c.Assert(err, check.IsNil)
1093         var allfiles []string
1094         for marker := ""; ; {
1095                 resp, err := stage.collbucket.List("", "", marker, 20000)
1096                 c.Check(err, check.IsNil)
1097                 for _, key := range resp.Contents {
1098                         if len(allfiles) == 0 || allfiles[len(allfiles)-1] != key.Key {
1099                                 allfiles = append(allfiles, key.Key)
1100                         }
1101                 }
1102                 marker = resp.NextMarker
1103                 if marker == "" {
1104                         break
1105                 }
1106         }
1107         markers := 0
1108         if s.handler.Cluster.Collections.S3FolderObjects {
1109                 markers = 1
1110         }
1111         c.Check(allfiles, check.HasLen, dirs*(filesPerDir+markers)+3+markers)
1112
1113         gotDirMarker := map[string]bool{}
1114         for _, name := range allfiles {
1115                 isDirMarker := strings.HasSuffix(name, "/")
1116                 if markers == 0 {
1117                         c.Check(isDirMarker, check.Equals, false, check.Commentf("name %q", name))
1118                 } else if isDirMarker {
1119                         gotDirMarker[name] = true
1120                 } else if i := strings.LastIndex(name, "/"); i >= 0 {
1121                         c.Check(gotDirMarker[name[:i+1]], check.Equals, true, check.Commentf("name %q", name))
1122                         gotDirMarker[name[:i+1]] = true // skip redundant complaints about this dir marker
1123                 }
1124         }
1125
1126         for _, trial := range []struct {
1127                 prefix    string
1128                 delimiter string
1129                 marker    string
1130         }{
1131                 {"", "", ""},
1132                 {"di", "/", ""},
1133                 {"di", "r", ""},
1134                 {"di", "n", ""},
1135                 {"dir0", "/", ""},
1136                 {"dir0/", "/", ""},
1137                 {"dir0/f", "/", ""},
1138                 {"dir0", "", ""},
1139                 {"dir0/", "", ""},
1140                 {"dir0/f", "", ""},
1141                 {"dir0", "/", "dir0/file14.txt"},       // one commonprefix, "dir0/"
1142                 {"dir0", "/", "dir0/zzzzfile.txt"},     // no commonprefixes
1143                 {"", "", "dir0/file14.txt"},            // middle page, skip walking dir1
1144                 {"", "", "dir1/file14.txt"},            // middle page, skip walking dir0
1145                 {"", "", "dir1/file498.txt"},           // last page of results
1146                 {"dir1/file", "", "dir1/file498.txt"},  // last page of results, with prefix
1147                 {"dir1/file", "/", "dir1/file498.txt"}, // last page of results, with prefix + delimiter
1148                 {"dir1", "Z", "dir1/file498.txt"},      // delimiter "Z" never appears
1149                 {"dir2", "/", ""},                      // prefix "dir2" does not exist
1150                 {"", "/", ""},
1151         } {
1152                 c.Logf("\n\n=== trial %+v markers=%d", trial, markers)
1153
1154                 maxKeys := 20
1155                 resp, err := stage.collbucket.List(trial.prefix, trial.delimiter, trial.marker, maxKeys)
1156                 c.Check(err, check.IsNil)
1157                 if resp.IsTruncated && trial.delimiter == "" {
1158                         // goamz List method fills in the missing
1159                         // NextMarker field if resp.IsTruncated, so
1160                         // now we can't really tell whether it was
1161                         // sent by the server or by goamz. In cases
1162                         // where it should be empty but isn't, assume
1163                         // it's goamz's fault.
1164                         resp.NextMarker = ""
1165                 }
1166
1167                 var expectKeys []string
1168                 var expectPrefixes []string
1169                 var expectNextMarker string
1170                 var expectTruncated bool
1171                 for _, key := range allfiles {
1172                         full := len(expectKeys)+len(expectPrefixes) >= maxKeys
1173                         if !strings.HasPrefix(key, trial.prefix) || key <= trial.marker {
1174                                 continue
1175                         } else if idx := strings.Index(key[len(trial.prefix):], trial.delimiter); trial.delimiter != "" && idx >= 0 {
1176                                 prefix := key[:len(trial.prefix)+idx+1]
1177                                 if len(expectPrefixes) > 0 && expectPrefixes[len(expectPrefixes)-1] == prefix {
1178                                         // same prefix as previous key
1179                                 } else if full {
1180                                         expectTruncated = true
1181                                 } else {
1182                                         expectPrefixes = append(expectPrefixes, prefix)
1183                                         expectNextMarker = prefix
1184                                 }
1185                         } else if full {
1186                                 expectTruncated = true
1187                                 break
1188                         } else {
1189                                 expectKeys = append(expectKeys, key)
1190                                 if trial.delimiter != "" {
1191                                         expectNextMarker = key
1192                                 }
1193                         }
1194                 }
1195                 if !expectTruncated {
1196                         expectNextMarker = ""
1197                 }
1198
1199                 var gotKeys []string
1200                 for _, key := range resp.Contents {
1201                         gotKeys = append(gotKeys, key.Key)
1202                 }
1203                 var gotPrefixes []string
1204                 for _, prefix := range resp.CommonPrefixes {
1205                         gotPrefixes = append(gotPrefixes, prefix)
1206                 }
1207                 commentf := check.Commentf("trial %+v markers=%d", trial, markers)
1208                 c.Check(gotKeys, check.DeepEquals, expectKeys, commentf)
1209                 c.Check(gotPrefixes, check.DeepEquals, expectPrefixes, commentf)
1210                 c.Check(resp.NextMarker, check.Equals, expectNextMarker, commentf)
1211                 c.Check(resp.IsTruncated, check.Equals, expectTruncated, commentf)
1212                 c.Logf("=== trial %+v keys %q prefixes %q nextMarker %q", trial, gotKeys, gotPrefixes, resp.NextMarker)
1213         }
1214 }
1215
1216 func (s *IntegrationSuite) TestS3ListObjectsV2ManySubprojects(c *check.C) {
1217         stage := s.s3setup(c)
1218         defer stage.teardown(c)
1219         projects := 50
1220         collectionsPerProject := 2
1221         for i := 0; i < projects; i++ {
1222                 var subproj arvados.Group
1223                 err := stage.arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
1224                         "group": map[string]interface{}{
1225                                 "owner_uuid":  stage.subproj.UUID,
1226                                 "group_class": "project",
1227                                 "name":        fmt.Sprintf("keep-web s3 test subproject %d", i),
1228                         },
1229                 })
1230                 c.Assert(err, check.IsNil)
1231                 for j := 0; j < collectionsPerProject; j++ {
1232                         err = stage.arv.RequestAndDecode(nil, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
1233                                 "owner_uuid":    subproj.UUID,
1234                                 "name":          fmt.Sprintf("keep-web s3 test collection %d", j),
1235                                 "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
1236                         }})
1237                         c.Assert(err, check.IsNil)
1238                 }
1239         }
1240         c.Logf("setup complete")
1241
1242         sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
1243                 Region:           aws_aws.String("auto"),
1244                 Endpoint:         aws_aws.String(s.testServer.URL),
1245                 Credentials:      aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
1246                 S3ForcePathStyle: aws_aws.Bool(true),
1247         }))
1248         client := aws_s3.New(sess)
1249         ctx := context.Background()
1250         params := aws_s3.ListObjectsV2Input{
1251                 Bucket:    aws_aws.String(stage.proj.UUID),
1252                 Delimiter: aws_aws.String("/"),
1253                 Prefix:    aws_aws.String("keep-web s3 test subproject/"),
1254                 MaxKeys:   aws_aws.Int64(int64(projects / 2)),
1255         }
1256         for page := 1; ; page++ {
1257                 t0 := time.Now()
1258                 result, err := client.ListObjectsV2WithContext(ctx, &params)
1259                 if !c.Check(err, check.IsNil) {
1260                         break
1261                 }
1262                 c.Logf("got page %d in %v with len(Contents) == %d, len(CommonPrefixes) == %d", page, time.Since(t0), len(result.Contents), len(result.CommonPrefixes))
1263                 if !*result.IsTruncated {
1264                         break
1265                 }
1266                 params.ContinuationToken = result.NextContinuationToken
1267                 *params.MaxKeys = *params.MaxKeys/2 + 1
1268         }
1269 }
1270
1271 func (s *IntegrationSuite) TestS3ListObjectsV2(c *check.C) {
1272         stage := s.s3setup(c)
1273         defer stage.teardown(c)
1274         dirs := 2
1275         filesPerDir := 40
1276         stage.writeBigDirs(c, dirs, filesPerDir)
1277
1278         sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
1279                 Region:           aws_aws.String("auto"),
1280                 Endpoint:         aws_aws.String(s.testServer.URL),
1281                 Credentials:      aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
1282                 S3ForcePathStyle: aws_aws.Bool(true),
1283         }))
1284
1285         stringOrNil := func(s string) *string {
1286                 if s == "" {
1287                         return nil
1288                 } else {
1289                         return &s
1290                 }
1291         }
1292
1293         client := aws_s3.New(sess)
1294         ctx := context.Background()
1295
1296         for _, trial := range []struct {
1297                 prefix               string
1298                 delimiter            string
1299                 startAfter           string
1300                 maxKeys              int
1301                 expectKeys           int
1302                 expectCommonPrefixes map[string]bool
1303         }{
1304                 {
1305                         // Expect {filesPerDir plus the dir itself}
1306                         // for each dir, plus emptydir, emptyfile, and
1307                         // sailboat.txt.
1308                         expectKeys: (filesPerDir+1)*dirs + 3,
1309                 },
1310                 {
1311                         maxKeys:    15,
1312                         expectKeys: (filesPerDir+1)*dirs + 3,
1313                 },
1314                 {
1315                         startAfter: "dir0/z",
1316                         maxKeys:    15,
1317                         // Expect {filesPerDir plus the dir itself}
1318                         // for each dir except dir0, plus emptydir,
1319                         // emptyfile, and sailboat.txt.
1320                         expectKeys: (filesPerDir+1)*(dirs-1) + 3,
1321                 },
1322                 {
1323                         maxKeys:              1,
1324                         delimiter:            "/",
1325                         expectKeys:           2, // emptyfile, sailboat.txt
1326                         expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true, "emptydir/": true},
1327                 },
1328                 {
1329                         startAfter:           "dir0/z",
1330                         maxKeys:              15,
1331                         delimiter:            "/",
1332                         expectKeys:           2, // emptyfile, sailboat.txt
1333                         expectCommonPrefixes: map[string]bool{"dir1/": true, "emptydir/": true},
1334                 },
1335                 {
1336                         startAfter:           "dir0/file10.txt",
1337                         maxKeys:              15,
1338                         delimiter:            "/",
1339                         expectKeys:           2,
1340                         expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true, "emptydir/": true},
1341                 },
1342                 {
1343                         startAfter:           "dir0/file10.txt",
1344                         maxKeys:              15,
1345                         prefix:               "d",
1346                         delimiter:            "/",
1347                         expectKeys:           0,
1348                         expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true},
1349                 },
1350         } {
1351                 c.Logf("[trial %+v]", trial)
1352                 params := aws_s3.ListObjectsV2Input{
1353                         Bucket:     aws_aws.String(stage.collbucket.Name),
1354                         Prefix:     stringOrNil(trial.prefix),
1355                         Delimiter:  stringOrNil(trial.delimiter),
1356                         StartAfter: stringOrNil(trial.startAfter),
1357                         MaxKeys:    aws_aws.Int64(int64(trial.maxKeys)),
1358                 }
1359                 keySeen := map[string]bool{}
1360                 prefixSeen := map[string]bool{}
1361                 for {
1362                         result, err := client.ListObjectsV2WithContext(ctx, &params)
1363                         if !c.Check(err, check.IsNil) {
1364                                 break
1365                         }
1366                         c.Check(result.Name, check.DeepEquals, aws_aws.String(stage.collbucket.Name))
1367                         c.Check(result.Prefix, check.DeepEquals, aws_aws.String(trial.prefix))
1368                         c.Check(result.Delimiter, check.DeepEquals, aws_aws.String(trial.delimiter))
1369                         // The following two fields are expected to be
1370                         // nil (i.e., no tag in XML response) rather
1371                         // than "" when the corresponding request
1372                         // field was empty or nil.
1373                         c.Check(result.StartAfter, check.DeepEquals, stringOrNil(trial.startAfter))
1374                         c.Check(result.ContinuationToken, check.DeepEquals, params.ContinuationToken)
1375
1376                         if trial.maxKeys > 0 {
1377                                 c.Check(result.MaxKeys, check.DeepEquals, aws_aws.Int64(int64(trial.maxKeys)))
1378                                 c.Check(len(result.Contents)+len(result.CommonPrefixes) <= trial.maxKeys, check.Equals, true)
1379                         } else {
1380                                 c.Check(result.MaxKeys, check.DeepEquals, aws_aws.Int64(int64(s3MaxKeys)))
1381                         }
1382
1383                         for _, ent := range result.Contents {
1384                                 c.Assert(ent.Key, check.NotNil)
1385                                 c.Check(*ent.Key > trial.startAfter, check.Equals, true)
1386                                 c.Check(keySeen[*ent.Key], check.Equals, false, check.Commentf("dup key %q", *ent.Key))
1387                                 keySeen[*ent.Key] = true
1388                         }
1389                         for _, ent := range result.CommonPrefixes {
1390                                 c.Assert(ent.Prefix, check.NotNil)
1391                                 c.Check(strings.HasSuffix(*ent.Prefix, trial.delimiter), check.Equals, true, check.Commentf("bad CommonPrefix %q", *ent.Prefix))
1392                                 if strings.HasPrefix(trial.startAfter, *ent.Prefix) {
1393                                         // If we asked for
1394                                         // startAfter=dir0/file10.txt,
1395                                         // we expect dir0/ to be
1396                                         // returned as a common prefix
1397                                 } else {
1398                                         c.Check(*ent.Prefix > trial.startAfter, check.Equals, true)
1399                                 }
1400                                 c.Check(prefixSeen[*ent.Prefix], check.Equals, false, check.Commentf("dup common prefix %q", *ent.Prefix))
1401                                 prefixSeen[*ent.Prefix] = true
1402                         }
1403                         if *result.IsTruncated && c.Check(result.NextContinuationToken, check.Not(check.Equals), "") {
1404                                 params.ContinuationToken = aws_aws.String(*result.NextContinuationToken)
1405                         } else {
1406                                 break
1407                         }
1408                 }
1409                 c.Check(keySeen, check.HasLen, trial.expectKeys)
1410                 c.Check(prefixSeen, check.HasLen, len(trial.expectCommonPrefixes))
1411                 if len(trial.expectCommonPrefixes) > 0 {
1412                         c.Check(prefixSeen, check.DeepEquals, trial.expectCommonPrefixes)
1413                 }
1414         }
1415 }
1416
1417 func (s *IntegrationSuite) TestS3ListObjectsV2EncodingTypeURL(c *check.C) {
1418         stage := s.s3setup(c)
1419         defer stage.teardown(c)
1420         dirs := 2
1421         filesPerDir := 40
1422         stage.writeBigDirs(c, dirs, filesPerDir)
1423
1424         sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
1425                 Region:           aws_aws.String("auto"),
1426                 Endpoint:         aws_aws.String(s.testServer.URL),
1427                 Credentials:      aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
1428                 S3ForcePathStyle: aws_aws.Bool(true),
1429         }))
1430
1431         client := aws_s3.New(sess)
1432         ctx := context.Background()
1433
1434         result, err := client.ListObjectsV2WithContext(ctx, &aws_s3.ListObjectsV2Input{
1435                 Bucket:       aws_aws.String(stage.collbucket.Name),
1436                 Prefix:       aws_aws.String("dir0/"),
1437                 Delimiter:    aws_aws.String("/"),
1438                 StartAfter:   aws_aws.String("dir0/"),
1439                 EncodingType: aws_aws.String("url"),
1440         })
1441         c.Assert(err, check.IsNil)
1442         c.Check(*result.Prefix, check.Equals, "dir0%2F")
1443         c.Check(*result.Delimiter, check.Equals, "%2F")
1444         c.Check(*result.StartAfter, check.Equals, "dir0%2F")
1445         for _, ent := range result.Contents {
1446                 c.Check(*ent.Key, check.Matches, "dir0%2F.*")
1447         }
1448         result, err = client.ListObjectsV2WithContext(ctx, &aws_s3.ListObjectsV2Input{
1449                 Bucket:       aws_aws.String(stage.collbucket.Name),
1450                 Delimiter:    aws_aws.String("/"),
1451                 EncodingType: aws_aws.String("url"),
1452         })
1453         c.Assert(err, check.IsNil)
1454         c.Check(*result.Delimiter, check.Equals, "%2F")
1455         c.Check(result.CommonPrefixes, check.HasLen, dirs+1)
1456         for _, ent := range result.CommonPrefixes {
1457                 c.Check(*ent.Prefix, check.Matches, ".*%2F")
1458         }
1459 }
1460
1461 // TestS3cmd checks compatibility with the s3cmd command line tool, if
1462 // it's installed. As of Debian buster, s3cmd is only in backports, so
1463 // `arvados-server install` don't install it, and this test skips if
1464 // it's not installed.
1465 func (s *IntegrationSuite) TestS3cmd(c *check.C) {
1466         if _, err := exec.LookPath("s3cmd"); err != nil {
1467                 c.Skip("s3cmd not found")
1468                 return
1469         }
1470
1471         stage := s.s3setup(c)
1472         defer stage.teardown(c)
1473
1474         cmd := exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "ls", "s3://"+arvadostest.FooCollection)
1475         buf, err := cmd.CombinedOutput()
1476         c.Check(err, check.IsNil)
1477         c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`)
1478
1479         // This tests whether s3cmd's path normalization agrees with
1480         // keep-web's signature verification wrt chars like "|"
1481         // (neither reserved nor unreserved) and "," (not normally
1482         // percent-encoded in a path).
1483         tmpfile := c.MkDir() + "/dstfile"
1484         cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo,;$[|]bar", tmpfile)
1485         buf, err = cmd.CombinedOutput()
1486         c.Check(err, check.NotNil)
1487         // As of commit b7520e5c25e1bf25c1a8bf5aa2eadb299be8f606
1488         // (between debian bullseye and bookworm versions), s3cmd
1489         // started catching the NoSuchKey error code and replacing it
1490         // with "Source object '%s' does not exist.".
1491         c.Check(string(buf), check.Matches, `(?ms).*(NoSuchKey|Source object.*does not exist).*\n`)
1492
1493         tmpfile = c.MkDir() + "/foo"
1494         cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo", tmpfile)
1495         buf, err = cmd.CombinedOutput()
1496         c.Logf("%s", buf)
1497         if c.Check(err, check.IsNil) {
1498                 checkcontent, err := os.ReadFile(tmpfile)
1499                 c.Check(err, check.IsNil)
1500                 c.Check(string(checkcontent), check.Equals, "foo")
1501         }
1502 }
1503
1504 func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) {
1505         stage := s.s3setup(c)
1506         defer stage.teardown(c)
1507
1508         hdr, body, _ := s.runCurl(c, "AWS "+arvadostest.ActiveTokenV2+":none", stage.coll.UUID+".collections.example.com", "/sailboat.txt")
1509         c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
1510         c.Check(body, check.Equals, "⛵\n")
1511 }