]> git.arvados.org - arvados.git/blob - services/keep-web/s3_test.go
Merge branch '22581-api-service-support' refs #22581
[arvados.git] / services / keep-web / s3_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "bytes"
9         "context"
10         "crypto/rand"
11         "crypto/sha256"
12         "fmt"
13         "io"
14         "io/ioutil"
15         "mime"
16         "net/http"
17         "net/http/httptest"
18         "net/url"
19         "os"
20         "os/exec"
21         "sort"
22         "strings"
23         "sync"
24         "time"
25
26         "git.arvados.org/arvados.git/sdk/go/arvados"
27         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
28         "git.arvados.org/arvados.git/sdk/go/arvadostest"
29         "git.arvados.org/arvados.git/sdk/go/keepclient"
30         "github.com/AdRoll/goamz/aws"
31         "github.com/AdRoll/goamz/s3"
32         aws_aws "github.com/aws/aws-sdk-go/aws"
33         aws_credentials "github.com/aws/aws-sdk-go/aws/credentials"
34         aws_session "github.com/aws/aws-sdk-go/aws/session"
35         aws_s3 "github.com/aws/aws-sdk-go/service/s3"
36         check "gopkg.in/check.v1"
37 )
38
39 type CachedS3SecretSuite struct{}
40
41 var _ = check.Suite(&CachedS3SecretSuite{})
42
43 func (s *CachedS3SecretSuite) activeACA(expiresAt time.Time) *arvados.APIClientAuthorization {
44         return &arvados.APIClientAuthorization{
45                 UUID:      arvadostest.ActiveTokenUUID,
46                 APIToken:  arvadostest.ActiveToken,
47                 ExpiresAt: expiresAt,
48         }
49 }
50
51 func (s *CachedS3SecretSuite) TestNewCachedS3SecretExpiresBeforeTTL(c *check.C) {
52         expected := time.Unix(1<<29, 0)
53         aca := s.activeACA(expected)
54         actual := newCachedS3Secret(aca, time.Unix(1<<30, 0))
55         c.Check(actual.expiry, check.Equals, expected)
56 }
57
58 func (s *CachedS3SecretSuite) TestNewCachedS3SecretExpiresAfterTTL(c *check.C) {
59         expected := time.Unix(1<<29, 0)
60         aca := s.activeACA(time.Unix(1<<30, 0))
61         actual := newCachedS3Secret(aca, expected)
62         c.Check(actual.expiry, check.Equals, expected)
63 }
64
65 func (s *CachedS3SecretSuite) TestNewCachedS3SecretWithoutExpiry(c *check.C) {
66         expected := time.Unix(1<<29, 0)
67         aca := s.activeACA(time.Time{})
68         actual := newCachedS3Secret(aca, expected)
69         c.Check(actual.expiry, check.Equals, expected)
70 }
71
72 func (s *CachedS3SecretSuite) cachedSecretWithExpiry(expiry time.Time) *cachedS3Secret {
73         return &cachedS3Secret{
74                 auth:   s.activeACA(expiry),
75                 expiry: expiry,
76         }
77 }
78
79 func (s *CachedS3SecretSuite) TestIsValidAtEmpty(c *check.C) {
80         cache := &cachedS3Secret{}
81         c.Check(cache.isValidAt(time.Unix(0, 0)), check.Equals, false)
82         c.Check(cache.isValidAt(time.Unix(1<<31, 0)), check.Equals, false)
83 }
84
85 func (s *CachedS3SecretSuite) TestIsValidAtNoAuth(c *check.C) {
86         cache := &cachedS3Secret{expiry: time.Unix(3, 0)}
87         c.Check(cache.isValidAt(time.Unix(2, 0)), check.Equals, false)
88         c.Check(cache.isValidAt(time.Unix(4, 0)), check.Equals, false)
89 }
90
91 func (s *CachedS3SecretSuite) TestIsValidAtNoExpiry(c *check.C) {
92         cache := &cachedS3Secret{auth: s.activeACA(time.Unix(3, 0))}
93         c.Check(cache.isValidAt(time.Unix(2, 0)), check.Equals, false)
94         c.Check(cache.isValidAt(time.Unix(4, 0)), check.Equals, false)
95 }
96
97 func (s *CachedS3SecretSuite) TestIsValidAtTimeAfterExpiry(c *check.C) {
98         expiry := time.Unix(10, 0)
99         cache := s.cachedSecretWithExpiry(expiry)
100         c.Check(cache.isValidAt(expiry), check.Equals, false)
101         c.Check(cache.isValidAt(time.Unix(1<<25, 0)), check.Equals, false)
102         c.Check(cache.isValidAt(time.Unix(1<<30, 0)), check.Equals, false)
103 }
104
105 func (s *CachedS3SecretSuite) TestIsValidAtTimeBeforeExpiry(c *check.C) {
106         cache := s.cachedSecretWithExpiry(time.Unix(1<<30, 0))
107         c.Check(cache.isValidAt(time.Unix(1<<25, 0)), check.Equals, true)
108         c.Check(cache.isValidAt(time.Unix(1<<27, 0)), check.Equals, true)
109         c.Check(cache.isValidAt(time.Unix(1<<29, 0)), check.Equals, true)
110 }
111
112 func (s *CachedS3SecretSuite) TestIsValidAtZeroTime(c *check.C) {
113         cache := s.cachedSecretWithExpiry(time.Unix(10, 0))
114         c.Check(cache.isValidAt(time.Time{}), check.Equals, false)
115 }
116
117 type s3stage struct {
118         arv        *arvados.Client
119         ac         *arvadosclient.ArvadosClient
120         kc         *keepclient.KeepClient
121         proj       arvados.Group
122         projbucket *s3.Bucket
123         subproj    arvados.Group
124         coll       arvados.Collection
125         collbucket *s3.Bucket
126 }
127
128 func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
129         var proj, subproj arvados.Group
130         var coll arvados.Collection
131         arv := arvados.NewClientFromEnv()
132         arv.AuthToken = arvadostest.ActiveToken
133         err := arv.RequestAndDecode(&proj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
134                 "group": map[string]interface{}{
135                         "group_class": "project",
136                         "name":        "keep-web s3 test",
137                         "properties": map[string]interface{}{
138                                 "project-properties-key": "project properties value",
139                         },
140                 },
141                 "ensure_unique_name": true,
142         })
143         c.Assert(err, check.IsNil)
144         err = arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
145                 "group": map[string]interface{}{
146                         "owner_uuid":  proj.UUID,
147                         "group_class": "project",
148                         "name":        "keep-web s3 test subproject",
149                         "properties": map[string]interface{}{
150                                 "subproject_properties_key": "subproject properties value",
151                                 "invalid header key":        "this value will not be returned because key contains spaces",
152                         },
153                 },
154         })
155         c.Assert(err, check.IsNil)
156         err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
157                 "owner_uuid":    proj.UUID,
158                 "name":          "keep-web s3 test collection",
159                 "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
160                 "properties": map[string]interface{}{
161                         "string":   "string value",
162                         "array":    []string{"element1", "element2"},
163                         "object":   map[string]interface{}{"key": map[string]interface{}{"key2": "value⛵"}},
164                         "nonascii": "⛵",
165                         "newline":  "foo\r\nX-Bad: header",
166                         // This key cannot be expressed as a MIME
167                         // header key, so it will be silently skipped
168                         // (see "Inject" in PropertiesAsMetadata test)
169                         "a: a\r\nInject": "bogus",
170                 },
171         }})
172         c.Assert(err, check.IsNil)
173         ac, err := arvadosclient.New(arv)
174         c.Assert(err, check.IsNil)
175         kc, err := keepclient.MakeKeepClient(ac)
176         c.Assert(err, check.IsNil)
177         fs, err := coll.FileSystem(arv, kc)
178         c.Assert(err, check.IsNil)
179         f, err := fs.OpenFile("sailboat.txt", os.O_CREATE|os.O_WRONLY, 0644)
180         c.Assert(err, check.IsNil)
181         _, err = f.Write([]byte("⛵\n"))
182         c.Assert(err, check.IsNil)
183         err = f.Close()
184         c.Assert(err, check.IsNil)
185         err = fs.Sync()
186         c.Assert(err, check.IsNil)
187         err = arv.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil)
188         c.Assert(err, check.IsNil)
189
190         auth := aws.NewAuth(arvadostest.ActiveTokenUUID, arvadostest.ActiveToken, "", time.Now().Add(time.Hour))
191         region := aws.Region{
192                 Name:       "zzzzz",
193                 S3Endpoint: s.testServer.URL,
194         }
195         client := s3.New(*auth, region)
196         client.Signature = aws.V4Signature
197         return s3stage{
198                 arv:  arv,
199                 ac:   ac,
200                 kc:   kc,
201                 proj: proj,
202                 projbucket: &s3.Bucket{
203                         S3:   client,
204                         Name: proj.UUID,
205                 },
206                 subproj: subproj,
207                 coll:    coll,
208                 collbucket: &s3.Bucket{
209                         S3:   client,
210                         Name: coll.UUID,
211                 },
212         }
213 }
214
215 func (stage s3stage) teardown(c *check.C) {
216         if stage.coll.UUID != "" {
217                 err := stage.arv.RequestAndDecode(&stage.coll, "DELETE", "arvados/v1/collections/"+stage.coll.UUID, nil, nil)
218                 c.Check(err, check.IsNil)
219         }
220         if stage.proj.UUID != "" {
221                 err := stage.arv.RequestAndDecode(&stage.proj, "DELETE", "arvados/v1/groups/"+stage.proj.UUID, nil, nil)
222                 c.Check(err, check.IsNil)
223         }
224 }
225
226 func (s *IntegrationSuite) TestS3Signatures(c *check.C) {
227         stage := s.s3setup(c)
228         defer stage.teardown(c)
229
230         bucket := stage.collbucket
231         for _, trial := range []struct {
232                 success   bool
233                 signature int
234                 accesskey string
235                 secretkey string
236         }{
237                 {true, aws.V2Signature, arvadostest.ActiveToken, "none"},
238                 {true, aws.V2Signature, url.QueryEscape(arvadostest.ActiveTokenV2), "none"},
239                 {true, aws.V2Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), "none"},
240                 {false, aws.V2Signature, "none", "none"},
241                 {false, aws.V2Signature, "none", arvadostest.ActiveToken},
242
243                 {true, aws.V4Signature, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken},
244                 {true, aws.V4Signature, arvadostest.ActiveToken, arvadostest.ActiveToken},
245                 {true, aws.V4Signature, url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2)},
246                 {true, aws.V4Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1)},
247                 {false, aws.V4Signature, arvadostest.ActiveToken, ""},
248                 {false, aws.V4Signature, arvadostest.ActiveToken, "none"},
249                 {false, aws.V4Signature, "none", arvadostest.ActiveToken},
250                 {false, aws.V4Signature, "none", "none"},
251         } {
252                 c.Logf("%#v", trial)
253                 bucket.S3.Auth = *(aws.NewAuth(trial.accesskey, trial.secretkey, "", time.Now().Add(time.Hour)))
254                 bucket.S3.Signature = trial.signature
255                 _, err := bucket.GetReader("emptyfile")
256                 if trial.success {
257                         c.Check(err, check.IsNil)
258                 } else {
259                         c.Check(err, check.NotNil)
260                 }
261         }
262 }
263
264 func (s *IntegrationSuite) TestS3SecretCacheUpdates(c *check.C) {
265         stage := s.s3setup(c)
266         defer stage.teardown(c)
267         reqUrl, err := url.Parse("https://" + stage.collbucket.Name + ".example.com/")
268         c.Assert(err, check.IsNil)
269
270         for trialName, trialAuth := range map[string]string{
271                 "v1 token":                    arvadostest.ActiveToken,
272                 "token UUID":                  arvadostest.ActiveTokenUUID,
273                 "v2 token query escaped":      url.QueryEscape(arvadostest.ActiveTokenV2),
274                 "v2 token underscore escaped": strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1),
275         } {
276                 s.handler.s3SecretCache = nil
277                 req, err := http.NewRequest("GET", reqUrl.String(), bytes.NewReader(nil))
278                 if !c.Check(err, check.IsNil) {
279                         continue
280                 }
281                 secret := trialAuth
282                 if secret[5:12] == "-gj3su-" {
283                         secret = arvadostest.ActiveToken
284                 }
285                 s.sign(c, req, trialAuth, secret)
286                 rec := httptest.NewRecorder()
287                 s.handler.ServeHTTP(rec, req)
288                 if !c.Check(rec.Result().StatusCode, check.Equals, http.StatusOK,
289                         check.Commentf("%s auth did not get 200 OK response: %v", trialName, req)) {
290                         continue
291                 }
292
293                 for name, key := range map[string]string{
294                         "v1 token":   arvadostest.ActiveToken,
295                         "token UUID": arvadostest.ActiveTokenUUID,
296                         "v2 token":   arvadostest.ActiveTokenV2,
297                 } {
298                         actual, ok := s.handler.s3SecretCache[key]
299                         if c.Check(ok, check.Equals, true, check.Commentf("%s not cached from %s", name, trialName)) {
300                                 c.Check(actual.auth.UUID, check.Equals, arvadostest.ActiveTokenUUID)
301                         }
302                 }
303         }
304 }
305
306 func (s *IntegrationSuite) TestS3SecretCacheUsed(c *check.C) {
307         stage := s.s3setup(c)
308         defer stage.teardown(c)
309
310         token := arvadostest.ActiveToken
311         // Step 1: Make a request to get the active token in the cache.
312         reqUrl, err := url.Parse("https://" + stage.collbucket.Name + ".example.com/")
313         c.Assert(err, check.IsNil)
314         req, err := http.NewRequest("GET", reqUrl.String(), bytes.NewReader(nil))
315         s.sign(c, req, token, token)
316         rec := httptest.NewRecorder()
317         s.handler.ServeHTTP(rec, req)
318         resp := rec.Result()
319         c.Assert(resp.StatusCode, check.Equals, http.StatusOK,
320                 check.Commentf("first request did not get 200 OK response"))
321
322         // Step 2: Remove some cache keys our request doesn't rely upon.
323         c.Assert(s.handler.s3SecretCache[arvadostest.ActiveTokenUUID], check.NotNil)
324         delete(s.handler.s3SecretCache, arvadostest.ActiveTokenUUID)
325         c.Assert(s.handler.s3SecretCache[arvadostest.ActiveTokenV2], check.NotNil)
326         delete(s.handler.s3SecretCache, arvadostest.ActiveTokenV2)
327
328         // Step 3: Repeat the original request.
329         rec = httptest.NewRecorder()
330         s.handler.ServeHTTP(rec, req)
331         resp = rec.Result()
332         c.Assert(resp.StatusCode, check.Equals, http.StatusOK,
333                 check.Commentf("cached auth request did not get 200 OK response"))
334
335         // Step 4: Confirm the deleted cache keys were not re-added
336         // (which would imply the authorization was re-requested and cached).
337         c.Check(s.handler.s3SecretCache[arvadostest.ActiveTokenUUID], check.IsNil,
338                 check.Commentf("token UUID re-added to cache after removal"))
339         c.Check(s.handler.s3SecretCache[arvadostest.ActiveTokenV2], check.IsNil,
340                 check.Commentf("v2 token re-added to cache after removal"))
341 }
342
343 func (s *IntegrationSuite) TestS3SecretCacheCleanup(c *check.C) {
344         stage := s.s3setup(c)
345         defer stage.teardown(c)
346         td := -2 * s3SecretCacheTidyInterval
347         startTidied := time.Now().Add(td)
348         s.handler.s3SecretCacheNextTidy = startTidied
349         s.handler.s3SecretCache = make(map[string]*cachedS3Secret)
350         s.handler.s3SecretCache["old"] = &cachedS3Secret{expiry: startTidied.Add(td)}
351
352         reqUrl, err := url.Parse("https://" + stage.collbucket.Name + ".example.com/")
353         c.Assert(err, check.IsNil)
354         req, err := http.NewRequest("GET", reqUrl.String(), bytes.NewReader(nil))
355         token := arvadostest.ActiveToken
356         s.sign(c, req, token, token)
357         rec := httptest.NewRecorder()
358         s.handler.ServeHTTP(rec, req)
359
360         c.Check(s.handler.s3SecretCache["old"], check.IsNil,
361                 check.Commentf("expired token not removed from cache"))
362         c.Check(s.handler.s3SecretCacheNextTidy.After(startTidied), check.Equals, true,
363                 check.Commentf("s3SecretCacheNextTidy not updated"))
364         c.Check(s.handler.s3SecretCache[token], check.NotNil,
365                 check.Commentf("just-used token not found in cache"))
366 }
367
368 func (s *IntegrationSuite) TestS3HeadBucket(c *check.C) {
369         stage := s.s3setup(c)
370         defer stage.teardown(c)
371
372         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
373                 c.Logf("bucket %s", bucket.Name)
374                 exists, err := bucket.Exists("")
375                 c.Check(err, check.IsNil)
376                 c.Check(exists, check.Equals, true)
377         }
378 }
379
380 func (s *IntegrationSuite) TestS3CollectionGetObject(c *check.C) {
381         stage := s.s3setup(c)
382         defer stage.teardown(c)
383         s.testS3GetObject(c, stage.collbucket, "")
384 }
385 func (s *IntegrationSuite) TestS3ProjectGetObject(c *check.C) {
386         stage := s.s3setup(c)
387         defer stage.teardown(c)
388         s.testS3GetObject(c, stage.projbucket, stage.coll.Name+"/")
389 }
390 func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix string) {
391         rdr, err := bucket.GetReader(prefix + "emptyfile")
392         c.Assert(err, check.IsNil)
393         buf, err := ioutil.ReadAll(rdr)
394         c.Check(err, check.IsNil)
395         c.Check(len(buf), check.Equals, 0)
396         err = rdr.Close()
397         c.Check(err, check.IsNil)
398
399         // GetObject
400         rdr, err = bucket.GetReader(prefix + "missingfile")
401         c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
402         c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
403         c.Check(err, check.ErrorMatches, `The specified key does not exist.`)
404
405         // HeadObject
406         exists, err := bucket.Exists(prefix + "missingfile")
407         c.Check(err, check.IsNil)
408         c.Check(exists, check.Equals, false)
409
410         // GetObject
411         rdr, err = bucket.GetReader(prefix + "sailboat.txt")
412         c.Assert(err, check.IsNil)
413         buf, err = ioutil.ReadAll(rdr)
414         c.Check(err, check.IsNil)
415         c.Check(buf, check.DeepEquals, []byte("⛵\n"))
416         err = rdr.Close()
417         c.Check(err, check.IsNil)
418
419         // HeadObject
420         resp, err := bucket.Head(prefix+"sailboat.txt", nil)
421         c.Check(err, check.IsNil)
422         c.Check(resp.StatusCode, check.Equals, http.StatusOK)
423         c.Check(resp.ContentLength, check.Equals, int64(4))
424
425         // HeadObject with superfluous leading slashes
426         exists, err = bucket.Exists(prefix + "//sailboat.txt")
427         c.Check(err, check.IsNil)
428         c.Check(exists, check.Equals, false)
429 }
430
431 func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) {
432         got := map[string]string{}
433         for hk, hv := range hdr {
434                 if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 {
435                         got[k] = hv[0]
436                 }
437         }
438         c.Check(got, check.DeepEquals, expect)
439 }
440
441 func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) {
442         stage := s.s3setup(c)
443         defer stage.teardown(c)
444
445         expectCollectionTags := map[string]string{
446                 "String":   "string value",
447                 "Array":    `["element1","element2"]`,
448                 "Object":   mime.BEncoding.Encode("UTF-8", `{"key":{"key2":"value⛵"}}`),
449                 "Nonascii": "=?UTF-8?b?4pu1?=",
450                 "Newline":  mime.BEncoding.Encode("UTF-8", "foo\r\nX-Bad: header"),
451         }
452         expectSubprojectTags := map[string]string{
453                 "Subproject_properties_key": "subproject properties value",
454         }
455         expectProjectTags := map[string]string{
456                 "Project-Properties-Key": "project properties value",
457         }
458
459         c.Log("HEAD object with metadata from collection")
460         resp, err := stage.collbucket.Head("sailboat.txt", nil)
461         c.Assert(err, check.IsNil)
462         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
463
464         c.Log("GET object with metadata from collection")
465         rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt")
466         c.Assert(err, check.IsNil)
467         content, err := ioutil.ReadAll(rdr)
468         c.Check(err, check.IsNil)
469         rdr.Close()
470         c.Check(content, check.HasLen, 4)
471         s.checkMetaEquals(c, hdr, expectCollectionTags)
472         c.Check(hdr["Inject"], check.IsNil)
473
474         c.Log("HEAD bucket with metadata from collection")
475         resp, err = stage.collbucket.Head("/", nil)
476         c.Assert(err, check.IsNil)
477         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
478
479         c.Log("HEAD directory placeholder with metadata from collection")
480         resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil)
481         c.Assert(err, check.IsNil)
482         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
483
484         c.Log("HEAD file with metadata from collection")
485         resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil)
486         c.Assert(err, check.IsNil)
487         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
488
489         c.Log("HEAD directory placeholder with metadata from subproject")
490         resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil)
491         c.Assert(err, check.IsNil)
492         s.checkMetaEquals(c, resp.Header, expectSubprojectTags)
493
494         c.Log("HEAD bucket with metadata from project")
495         resp, err = stage.projbucket.Head("/", nil)
496         c.Assert(err, check.IsNil)
497         s.checkMetaEquals(c, resp.Header, expectProjectTags)
498 }
499
500 func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
501         stage := s.s3setup(c)
502         defer stage.teardown(c)
503         s.testS3PutObjectSuccess(c, stage.collbucket, "", stage.coll.UUID)
504 }
505 func (s *IntegrationSuite) TestS3ProjectPutObjectSuccess(c *check.C) {
506         stage := s.s3setup(c)
507         defer stage.teardown(c)
508         s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/", stage.coll.UUID)
509 }
510 func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string, collUUID string) {
511         // We insert a delay between test cases to ensure we exercise
512         // rollover of expired sessions.
513         sleep := time.Second / 100
514         s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(sleep * 3)
515
516         for _, trial := range []struct {
517                 path        string
518                 size        int
519                 contentType string
520         }{
521                 {
522                         path:        "newfile",
523                         size:        128000000,
524                         contentType: "application/octet-stream",
525                 }, {
526                         path:        "newdir/newfile",
527                         size:        1 << 26,
528                         contentType: "application/octet-stream",
529                 }, {
530                         path:        "ccc/",
531                         size:        0,
532                         contentType: "application/x-directory",
533                 }, {
534                         path:        "newdir1/newdir2/newfile",
535                         size:        0,
536                         contentType: "application/octet-stream",
537                 }, {
538                         path:        "newdir1/newdir2/newdir3/",
539                         size:        0,
540                         contentType: "application/x-directory",
541                 },
542         } {
543                 time.Sleep(sleep)
544                 c.Logf("=== %v", trial)
545
546                 objname := prefix + trial.path
547
548                 _, err := bucket.GetReader(objname)
549                 if !c.Check(err, check.NotNil) {
550                         continue
551                 }
552                 c.Check(err.(*s3.Error).StatusCode, check.Equals, http.StatusNotFound)
553                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
554                 if !c.Check(err, check.ErrorMatches, `The specified key does not exist.`) {
555                         continue
556                 }
557
558                 buf := make([]byte, trial.size)
559                 rand.Read(buf)
560
561                 err = bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
562                 c.Check(err, check.IsNil)
563
564                 rdr, err := bucket.GetReader(objname)
565                 if strings.HasSuffix(trial.path, "/") && !s.handler.Cluster.Collections.S3FolderObjects {
566                         c.Check(err, check.NotNil)
567                         continue
568                 } else if !c.Check(err, check.IsNil) {
569                         continue
570                 }
571                 buf2, err := ioutil.ReadAll(rdr)
572                 c.Check(err, check.IsNil)
573                 c.Check(buf2, check.HasLen, len(buf))
574                 c.Check(bytes.Equal(buf, buf2), check.Equals, true)
575
576                 // Check that the change is immediately visible via
577                 // (non-S3) webdav request.
578                 _, resp := s.do("GET", "http://"+collUUID+".keep-web.example/"+trial.path, arvadostest.ActiveTokenV2, nil, nil)
579                 c.Check(resp.StatusCode, check.Equals, http.StatusOK)
580                 if !strings.HasSuffix(trial.path, "/") {
581                         buf, _ := io.ReadAll(resp.Body)
582                         c.Check(len(buf), check.Equals, trial.size)
583                 }
584         }
585 }
586
587 func (s *IntegrationSuite) TestS3ProjectPutObjectNotSupported(c *check.C) {
588         stage := s.s3setup(c)
589         defer stage.teardown(c)
590         bucket := stage.projbucket
591
592         for _, trial := range []struct {
593                 path        string
594                 size        int
595                 contentType string
596         }{
597                 {
598                         path:        "newfile",
599                         size:        1234,
600                         contentType: "application/octet-stream",
601                 }, {
602                         path:        "newdir/newfile",
603                         size:        1234,
604                         contentType: "application/octet-stream",
605                 }, {
606                         path:        "newdir2/",
607                         size:        0,
608                         contentType: "application/x-directory",
609                 },
610         } {
611                 c.Logf("=== %v", trial)
612
613                 _, err := bucket.GetReader(trial.path)
614                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
615                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
616                 c.Assert(err, check.ErrorMatches, `The specified key does not exist.`)
617
618                 buf := make([]byte, trial.size)
619                 rand.Read(buf)
620
621                 err = bucket.PutReader(trial.path, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
622                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 400)
623                 c.Check(err.(*s3.Error).Code, check.Equals, `InvalidArgument`)
624                 c.Check(err, check.ErrorMatches, `(mkdir "/by_id/zzzzz-j7d0g-[a-z0-9]{15}/newdir2?"|open "/zzzzz-j7d0g-[a-z0-9]{15}/newfile") failed: invalid (argument|operation)`)
625
626                 _, err = bucket.GetReader(trial.path)
627                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
628                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
629                 c.Assert(err, check.ErrorMatches, `The specified key does not exist.`)
630         }
631 }
632
633 func (s *IntegrationSuite) TestS3CollectionDeleteObject(c *check.C) {
634         stage := s.s3setup(c)
635         defer stage.teardown(c)
636         s.testS3DeleteObject(c, stage.collbucket, "")
637 }
638 func (s *IntegrationSuite) TestS3ProjectDeleteObject(c *check.C) {
639         stage := s.s3setup(c)
640         defer stage.teardown(c)
641         s.testS3DeleteObject(c, stage.projbucket, stage.coll.Name+"/")
642 }
643 func (s *IntegrationSuite) testS3DeleteObject(c *check.C, bucket *s3.Bucket, prefix string) {
644         s.handler.Cluster.Collections.S3FolderObjects = true
645         for _, trial := range []struct {
646                 path string
647         }{
648                 {"/"},
649                 {"nonexistentfile"},
650                 {"emptyfile"},
651                 {"sailboat.txt"},
652                 {"sailboat.txt/"},
653                 {"emptydir"},
654                 {"emptydir/"},
655         } {
656                 objname := prefix + trial.path
657                 comment := check.Commentf("objname %q", objname)
658
659                 err := bucket.Del(objname)
660                 if trial.path == "/" {
661                         c.Check(err, check.NotNil)
662                         continue
663                 }
664                 c.Check(err, check.IsNil, comment)
665                 _, err = bucket.GetReader(objname)
666                 c.Check(err, check.NotNil, comment)
667         }
668 }
669
670 func (s *IntegrationSuite) TestS3CollectionPutObjectFailure(c *check.C) {
671         stage := s.s3setup(c)
672         defer stage.teardown(c)
673         s.testS3PutObjectFailure(c, stage.collbucket, "")
674 }
675 func (s *IntegrationSuite) TestS3ProjectPutObjectFailure(c *check.C) {
676         stage := s.s3setup(c)
677         defer stage.teardown(c)
678         s.testS3PutObjectFailure(c, stage.projbucket, stage.coll.Name+"/")
679 }
680 func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, prefix string) {
681         s.handler.Cluster.Collections.S3FolderObjects = false
682
683         var wg sync.WaitGroup
684         for _, trial := range []struct {
685                 path string
686         }{
687                 {
688                         path: "emptyfile/newname", // emptyfile exists, see s3setup()
689                 }, {
690                         path: "emptyfile/", // emptyfile exists, see s3setup()
691                 }, {
692                         path: "emptydir", // dir already exists, see s3setup()
693                 }, {
694                         path: "emptydir/",
695                 }, {
696                         path: "newdir/",
697                 }, {
698                         path: "",
699                 },
700         } {
701                 trial := trial
702                 wg.Add(1)
703                 go func() {
704                         defer wg.Done()
705                         c.Logf("=== %v", trial)
706
707                         objname := prefix + trial.path
708
709                         buf := make([]byte, 1234)
710                         rand.Read(buf)
711
712                         err := bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), "application/octet-stream", s3.Private, s3.Options{})
713                         if !c.Check(err, check.ErrorMatches, `(invalid object name.*|open ".*" failed.*|object name conflicts with existing object|Missing object name in PUT request.)`, check.Commentf("PUT %q should fail", objname)) {
714                                 return
715                         }
716
717                         if objname != "" && objname != "/" {
718                                 _, err = bucket.GetReader(objname)
719                                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
720                                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
721                                 c.Check(err, check.ErrorMatches, `The specified key does not exist.`, check.Commentf("GET %q should return 404", objname))
722                         }
723                 }()
724         }
725         wg.Wait()
726 }
727
728 func (stage *s3stage) writeBigDirs(c *check.C, dirs int, filesPerDir int) {
729         fs, err := stage.coll.FileSystem(stage.arv, stage.kc)
730         c.Assert(err, check.IsNil)
731         for d := 0; d < dirs; d++ {
732                 dir := fmt.Sprintf("dir%d", d)
733                 c.Assert(fs.Mkdir(dir, 0755), check.IsNil)
734                 for i := 0; i < filesPerDir; i++ {
735                         f, err := fs.OpenFile(fmt.Sprintf("%s/file%d.txt", dir, i), os.O_CREATE|os.O_WRONLY, 0644)
736                         c.Assert(err, check.IsNil)
737                         c.Assert(f.Close(), check.IsNil)
738                 }
739         }
740         c.Assert(fs.Sync(), check.IsNil)
741 }
742
743 func (s *IntegrationSuite) sign(c *check.C, req *http.Request, key, secret string) {
744         scope := "20200202/zzzzz/service/aws4_request"
745         signedHeaders := "date"
746         req.Header.Set("Date", time.Now().UTC().Format(time.RFC1123))
747         stringToSign, err := s3stringToSign(s3SignAlgorithm, scope, signedHeaders, req)
748         c.Assert(err, check.IsNil)
749         sig, err := s3signature(secret, scope, signedHeaders, stringToSign)
750         c.Assert(err, check.IsNil)
751         req.Header.Set("Authorization", s3SignAlgorithm+" Credential="+key+"/"+scope+", SignedHeaders="+signedHeaders+", Signature="+sig)
752 }
753
754 func (s *IntegrationSuite) TestS3VirtualHostStyleRequests(c *check.C) {
755         stage := s.s3setup(c)
756         defer stage.teardown(c)
757         for _, trial := range []struct {
758                 url            string
759                 method         string
760                 body           string
761                 responseCode   int
762                 responseRegexp []string
763                 checkEtag      bool
764         }{
765                 {
766                         url:            "https://" + stage.collbucket.Name + ".example.com/",
767                         method:         "GET",
768                         responseCode:   http.StatusOK,
769                         responseRegexp: []string{`(?ms).*sailboat\.txt.*`},
770                 },
771                 {
772                         url:            "https://" + strings.Replace(stage.coll.PortableDataHash, "+", "-", -1) + ".example.com/",
773                         method:         "GET",
774                         responseCode:   http.StatusOK,
775                         responseRegexp: []string{`(?ms).*sailboat\.txt.*`},
776                 },
777                 {
778                         url:            "https://" + stage.projbucket.Name + ".example.com/?prefix=" + stage.coll.Name + "/&delimiter=/",
779                         method:         "GET",
780                         responseCode:   http.StatusOK,
781                         responseRegexp: []string{`(?ms).*sailboat\.txt.*`},
782                 },
783                 {
784                         url:            "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/sailboat.txt",
785                         method:         "GET",
786                         responseCode:   http.StatusOK,
787                         responseRegexp: []string{`⛵\n`},
788                         checkEtag:      true,
789                 },
790                 {
791                         url:          "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/beep",
792                         method:       "PUT",
793                         body:         "boop",
794                         responseCode: http.StatusOK,
795                 },
796                 {
797                         url:            "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/beep",
798                         method:         "GET",
799                         responseCode:   http.StatusOK,
800                         responseRegexp: []string{`boop`},
801                         checkEtag:      true,
802                 },
803                 {
804                         url:          "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop",
805                         method:       "GET",
806                         responseCode: http.StatusNotFound,
807                 },
808                 {
809                         url:          "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop",
810                         method:       "PUT",
811                         body:         "boop",
812                         responseCode: http.StatusOK,
813                 },
814                 {
815                         url:            "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop",
816                         method:         "GET",
817                         responseCode:   http.StatusOK,
818                         responseRegexp: []string{`boop`},
819                         checkEtag:      true,
820                 },
821         } {
822                 url, err := url.Parse(trial.url)
823                 c.Assert(err, check.IsNil)
824                 req, err := http.NewRequest(trial.method, url.String(), bytes.NewReader([]byte(trial.body)))
825                 c.Assert(err, check.IsNil)
826                 s.sign(c, req, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken)
827                 rr := httptest.NewRecorder()
828                 s.handler.ServeHTTP(rr, req)
829                 resp := rr.Result()
830                 c.Check(resp.StatusCode, check.Equals, trial.responseCode)
831                 body, err := ioutil.ReadAll(resp.Body)
832                 c.Assert(err, check.IsNil)
833                 for _, re := range trial.responseRegexp {
834                         c.Check(string(body), check.Matches, re)
835                 }
836                 if trial.checkEtag {
837                         c.Check(resp.Header.Get("Etag"), check.Matches, `"[\da-f]{32}\+\d+"`)
838                 }
839         }
840 }
841
842 func (s *IntegrationSuite) TestS3NormalizeURIForSignature(c *check.C) {
843         stage := s.s3setup(c)
844         defer stage.teardown(c)
845         for _, trial := range []struct {
846                 rawPath        string
847                 normalizedPath string
848         }{
849                 {"/foo", "/foo"},                           // boring case
850                 {"/foo%5fbar", "/foo_bar"},                 // _ must not be escaped
851                 {"/foo%2fbar", "/foo/bar"},                 // / must not be escaped
852                 {"/(foo)/[];,", "/%28foo%29/%5B%5D%3B%2C"}, // ()[];, must be escaped
853                 {"/foo%5bbar", "/foo%5Bbar"},               // %XX must be uppercase
854                 // unicode chars must be UTF-8 encoded and escaped
855                 {"/\u26f5", "/%E2%9B%B5"},
856                 // "//" and "///" must not be squashed -- see example,
857                 // https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
858                 {"//foo///.bar", "//foo///.bar"},
859         } {
860                 c.Logf("trial %q", trial)
861
862                 date := time.Now().UTC().Format("20060102T150405Z")
863                 scope := "20200202/zzzzz/S3/aws4_request"
864                 canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", "GET", trial.normalizedPath, "", "host:host.example.com\n", "host", "")
865                 c.Logf("canonicalRequest %q", canonicalRequest)
866                 expect := fmt.Sprintf("%s\n%s\n%s\n%s", s3SignAlgorithm, date, scope, hashdigest(sha256.New(), canonicalRequest))
867                 c.Logf("expected stringToSign %q", expect)
868
869                 req, err := http.NewRequest("GET", "https://host.example.com"+trial.rawPath, nil)
870                 req.Header.Set("X-Amz-Date", date)
871                 req.Host = "host.example.com"
872                 c.Assert(err, check.IsNil)
873
874                 obtained, err := s3stringToSign(s3SignAlgorithm, scope, "host", req)
875                 if !c.Check(err, check.IsNil) {
876                         continue
877                 }
878                 c.Check(obtained, check.Equals, expect)
879         }
880 }
881
882 func (s *IntegrationSuite) TestS3GetBucketLocation(c *check.C) {
883         stage := s.s3setup(c)
884         defer stage.teardown(c)
885         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
886                 req, err := http.NewRequest("GET", bucket.URL("/"), nil)
887                 c.Check(err, check.IsNil)
888                 req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
889                 req.URL.RawQuery = "location"
890                 resp, err := http.DefaultClient.Do(req)
891                 c.Assert(err, check.IsNil)
892                 c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
893                 buf, err := ioutil.ReadAll(resp.Body)
894                 c.Assert(err, check.IsNil)
895                 c.Check(string(buf), check.Equals, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<LocationConstraint><LocationConstraint xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">zzzzz</LocationConstraint></LocationConstraint>\n")
896         }
897 }
898
899 func (s *IntegrationSuite) TestS3GetBucketVersioning(c *check.C) {
900         stage := s.s3setup(c)
901         defer stage.teardown(c)
902         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
903                 req, err := http.NewRequest("GET", bucket.URL("/"), nil)
904                 c.Check(err, check.IsNil)
905                 req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
906                 req.URL.RawQuery = "versioning"
907                 resp, err := http.DefaultClient.Do(req)
908                 c.Assert(err, check.IsNil)
909                 c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
910                 buf, err := ioutil.ReadAll(resp.Body)
911                 c.Assert(err, check.IsNil)
912                 c.Check(string(buf), check.Equals, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<VersioningConfiguration xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\"/>\n")
913         }
914 }
915
916 func (s *IntegrationSuite) TestS3UnsupportedAPIs(c *check.C) {
917         stage := s.s3setup(c)
918         defer stage.teardown(c)
919         for _, trial := range []struct {
920                 method   string
921                 path     string
922                 rawquery string
923         }{
924                 {"GET", "/", "acl&versionId=1234"},    // GetBucketAcl
925                 {"GET", "/foo", "acl&versionId=1234"}, // GetObjectAcl
926                 {"PUT", "/", "acl"},                   // PutBucketAcl
927                 {"PUT", "/foo", "acl"},                // PutObjectAcl
928                 {"DELETE", "/", "tagging"},            // DeleteBucketTagging
929                 {"DELETE", "/foo", "tagging"},         // DeleteObjectTagging
930         } {
931                 for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
932                         c.Logf("trial %v bucket %v", trial, bucket)
933                         req, err := http.NewRequest(trial.method, bucket.URL(trial.path), nil)
934                         c.Check(err, check.IsNil)
935                         req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
936                         req.URL.RawQuery = trial.rawquery
937                         resp, err := http.DefaultClient.Do(req)
938                         c.Assert(err, check.IsNil)
939                         c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
940                         buf, err := ioutil.ReadAll(resp.Body)
941                         c.Assert(err, check.IsNil)
942                         c.Check(string(buf), check.Matches, "(?ms).*InvalidRequest.*API not supported.*")
943                 }
944         }
945 }
946
947 // If there are no CommonPrefixes entries, the CommonPrefixes XML tag
948 // should not appear at all.
949 func (s *IntegrationSuite) TestS3ListNoCommonPrefixes(c *check.C) {
950         stage := s.s3setup(c)
951         defer stage.teardown(c)
952
953         req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil)
954         c.Assert(err, check.IsNil)
955         req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
956         req.URL.RawQuery = "prefix=asdfasdfasdf&delimiter=/"
957         resp, err := http.DefaultClient.Do(req)
958         c.Assert(err, check.IsNil)
959         buf, err := ioutil.ReadAll(resp.Body)
960         c.Assert(err, check.IsNil)
961         c.Check(string(buf), check.Not(check.Matches), `(?ms).*CommonPrefixes.*`)
962 }
963
964 // If there is no delimiter in the request, or the results are not
965 // truncated, the NextMarker XML tag should not appear in the response
966 // body.
967 func (s *IntegrationSuite) TestS3ListNoNextMarker(c *check.C) {
968         stage := s.s3setup(c)
969         defer stage.teardown(c)
970
971         for _, query := range []string{"prefix=e&delimiter=/", ""} {
972                 req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil)
973                 c.Assert(err, check.IsNil)
974                 req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
975                 req.URL.RawQuery = query
976                 resp, err := http.DefaultClient.Do(req)
977                 c.Assert(err, check.IsNil)
978                 buf, err := ioutil.ReadAll(resp.Body)
979                 c.Assert(err, check.IsNil)
980                 c.Check(string(buf), check.Not(check.Matches), `(?ms).*NextMarker.*`)
981         }
982 }
983
984 // List response should include KeyCount field.
985 func (s *IntegrationSuite) TestS3ListKeyCount(c *check.C) {
986         stage := s.s3setup(c)
987         defer stage.teardown(c)
988
989         req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil)
990         c.Assert(err, check.IsNil)
991         req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
992         req.URL.RawQuery = "prefix=&delimiter=/"
993         resp, err := http.DefaultClient.Do(req)
994         c.Assert(err, check.IsNil)
995         buf, err := ioutil.ReadAll(resp.Body)
996         c.Assert(err, check.IsNil)
997         c.Check(string(buf), check.Matches, `(?ms).*<KeyCount>2</KeyCount>.*`)
998 }
999
1000 func (s *IntegrationSuite) TestS3CollectionList(c *check.C) {
1001         stage := s.s3setup(c)
1002         defer stage.teardown(c)
1003
1004         var markers int
1005         for markers, s.handler.Cluster.Collections.S3FolderObjects = range []bool{false, true} {
1006                 dirs := 2000
1007                 filesPerDir := 2
1008                 stage.writeBigDirs(c, dirs, filesPerDir)
1009                 // Total # objects is:
1010                 //                 2 file entries from s3setup (emptyfile and sailboat.txt)
1011                 //                +1 fake "directory" marker from s3setup (emptydir) (if enabled)
1012                 //             +dirs fake "directory" marker from writeBigDirs (dir0/, dir1/) (if enabled)
1013                 // +filesPerDir*dirs file entries from writeBigDirs (dir0/file0.txt, etc.)
1014                 s.testS3List(c, stage.collbucket, "", 4000, markers+2+(filesPerDir+markers)*dirs)
1015                 s.testS3List(c, stage.collbucket, "", 131, markers+2+(filesPerDir+markers)*dirs)
1016                 s.testS3List(c, stage.collbucket, "", 51, markers+2+(filesPerDir+markers)*dirs)
1017                 s.testS3List(c, stage.collbucket, "dir0/", 71, filesPerDir+markers)
1018         }
1019 }
1020 func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix string, pageSize, expectFiles int) {
1021         c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.handler.Cluster.Collections.S3FolderObjects)
1022         expectPageSize := pageSize
1023         if expectPageSize > 1000 {
1024                 expectPageSize = 1000
1025         }
1026         gotKeys := map[string]s3.Key{}
1027         nextMarker := ""
1028         pages := 0
1029         for {
1030                 resp, err := bucket.List(prefix, "", nextMarker, pageSize)
1031                 if !c.Check(err, check.IsNil) {
1032                         break
1033                 }
1034                 c.Check(len(resp.Contents) <= expectPageSize, check.Equals, true)
1035                 if pages++; !c.Check(pages <= (expectFiles/expectPageSize)+1, check.Equals, true) {
1036                         break
1037                 }
1038                 for _, key := range resp.Contents {
1039                         if _, dup := gotKeys[key.Key]; dup {
1040                                 c.Errorf("got duplicate key %q on page %d", key.Key, pages)
1041                         }
1042                         gotKeys[key.Key] = key
1043                         if strings.Contains(key.Key, "sailboat.txt") {
1044                                 c.Check(key.Size, check.Equals, int64(4))
1045                         }
1046                 }
1047                 if !resp.IsTruncated {
1048                         c.Check(resp.NextMarker, check.Equals, "")
1049                         break
1050                 }
1051                 if !c.Check(resp.NextMarker, check.Not(check.Equals), "") {
1052                         break
1053                 }
1054                 nextMarker = resp.NextMarker
1055         }
1056         if !c.Check(len(gotKeys), check.Equals, expectFiles) {
1057                 var sorted []string
1058                 for k := range gotKeys {
1059                         sorted = append(sorted, k)
1060                 }
1061                 sort.Strings(sorted)
1062                 for _, k := range sorted {
1063                         c.Logf("got %s", k)
1064                 }
1065         }
1066 }
1067
1068 func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) {
1069         for _, s.handler.Cluster.Collections.S3FolderObjects = range []bool{false, true} {
1070                 s.testS3CollectionListRollup(c)
1071         }
1072 }
1073
1074 func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) {
1075         stage := s.s3setup(c)
1076         defer stage.teardown(c)
1077
1078         dirs := 2
1079         filesPerDir := 500
1080         stage.writeBigDirs(c, dirs, filesPerDir)
1081         err := stage.collbucket.PutReader("dingbats", &bytes.Buffer{}, 0, "application/octet-stream", s3.Private, s3.Options{})
1082         c.Assert(err, check.IsNil)
1083         var allfiles []string
1084         for marker := ""; ; {
1085                 resp, err := stage.collbucket.List("", "", marker, 20000)
1086                 c.Check(err, check.IsNil)
1087                 for _, key := range resp.Contents {
1088                         if len(allfiles) == 0 || allfiles[len(allfiles)-1] != key.Key {
1089                                 allfiles = append(allfiles, key.Key)
1090                         }
1091                 }
1092                 marker = resp.NextMarker
1093                 if marker == "" {
1094                         break
1095                 }
1096         }
1097         markers := 0
1098         if s.handler.Cluster.Collections.S3FolderObjects {
1099                 markers = 1
1100         }
1101         c.Check(allfiles, check.HasLen, dirs*(filesPerDir+markers)+3+markers)
1102
1103         gotDirMarker := map[string]bool{}
1104         for _, name := range allfiles {
1105                 isDirMarker := strings.HasSuffix(name, "/")
1106                 if markers == 0 {
1107                         c.Check(isDirMarker, check.Equals, false, check.Commentf("name %q", name))
1108                 } else if isDirMarker {
1109                         gotDirMarker[name] = true
1110                 } else if i := strings.LastIndex(name, "/"); i >= 0 {
1111                         c.Check(gotDirMarker[name[:i+1]], check.Equals, true, check.Commentf("name %q", name))
1112                         gotDirMarker[name[:i+1]] = true // skip redundant complaints about this dir marker
1113                 }
1114         }
1115
1116         for _, trial := range []struct {
1117                 prefix    string
1118                 delimiter string
1119                 marker    string
1120         }{
1121                 {"", "", ""},
1122                 {"di", "/", ""},
1123                 {"di", "r", ""},
1124                 {"di", "n", ""},
1125                 {"dir0", "/", ""},
1126                 {"dir0/", "/", ""},
1127                 {"dir0/f", "/", ""},
1128                 {"dir0", "", ""},
1129                 {"dir0/", "", ""},
1130                 {"dir0/f", "", ""},
1131                 {"dir0", "/", "dir0/file14.txt"},       // one commonprefix, "dir0/"
1132                 {"dir0", "/", "dir0/zzzzfile.txt"},     // no commonprefixes
1133                 {"", "", "dir0/file14.txt"},            // middle page, skip walking dir1
1134                 {"", "", "dir1/file14.txt"},            // middle page, skip walking dir0
1135                 {"", "", "dir1/file498.txt"},           // last page of results
1136                 {"dir1/file", "", "dir1/file498.txt"},  // last page of results, with prefix
1137                 {"dir1/file", "/", "dir1/file498.txt"}, // last page of results, with prefix + delimiter
1138                 {"dir1", "Z", "dir1/file498.txt"},      // delimiter "Z" never appears
1139                 {"dir2", "/", ""},                      // prefix "dir2" does not exist
1140                 {"", "/", ""},
1141         } {
1142                 c.Logf("\n\n=== trial %+v markers=%d", trial, markers)
1143
1144                 maxKeys := 20
1145                 resp, err := stage.collbucket.List(trial.prefix, trial.delimiter, trial.marker, maxKeys)
1146                 c.Check(err, check.IsNil)
1147                 if resp.IsTruncated && trial.delimiter == "" {
1148                         // goamz List method fills in the missing
1149                         // NextMarker field if resp.IsTruncated, so
1150                         // now we can't really tell whether it was
1151                         // sent by the server or by goamz. In cases
1152                         // where it should be empty but isn't, assume
1153                         // it's goamz's fault.
1154                         resp.NextMarker = ""
1155                 }
1156
1157                 var expectKeys []string
1158                 var expectPrefixes []string
1159                 var expectNextMarker string
1160                 var expectTruncated bool
1161                 for _, key := range allfiles {
1162                         full := len(expectKeys)+len(expectPrefixes) >= maxKeys
1163                         if !strings.HasPrefix(key, trial.prefix) || key <= trial.marker {
1164                                 continue
1165                         } else if idx := strings.Index(key[len(trial.prefix):], trial.delimiter); trial.delimiter != "" && idx >= 0 {
1166                                 prefix := key[:len(trial.prefix)+idx+1]
1167                                 if len(expectPrefixes) > 0 && expectPrefixes[len(expectPrefixes)-1] == prefix {
1168                                         // same prefix as previous key
1169                                 } else if full {
1170                                         expectTruncated = true
1171                                 } else {
1172                                         expectPrefixes = append(expectPrefixes, prefix)
1173                                         expectNextMarker = prefix
1174                                 }
1175                         } else if full {
1176                                 expectTruncated = true
1177                                 break
1178                         } else {
1179                                 expectKeys = append(expectKeys, key)
1180                                 if trial.delimiter != "" {
1181                                         expectNextMarker = key
1182                                 }
1183                         }
1184                 }
1185                 if !expectTruncated {
1186                         expectNextMarker = ""
1187                 }
1188
1189                 var gotKeys []string
1190                 for _, key := range resp.Contents {
1191                         gotKeys = append(gotKeys, key.Key)
1192                 }
1193                 var gotPrefixes []string
1194                 for _, prefix := range resp.CommonPrefixes {
1195                         gotPrefixes = append(gotPrefixes, prefix)
1196                 }
1197                 commentf := check.Commentf("trial %+v markers=%d", trial, markers)
1198                 c.Check(gotKeys, check.DeepEquals, expectKeys, commentf)
1199                 c.Check(gotPrefixes, check.DeepEquals, expectPrefixes, commentf)
1200                 c.Check(resp.NextMarker, check.Equals, expectNextMarker, commentf)
1201                 c.Check(resp.IsTruncated, check.Equals, expectTruncated, commentf)
1202                 c.Logf("=== trial %+v keys %q prefixes %q nextMarker %q", trial, gotKeys, gotPrefixes, resp.NextMarker)
1203         }
1204 }
1205
1206 func (s *IntegrationSuite) TestS3ListObjectsV2ManySubprojects(c *check.C) {
1207         stage := s.s3setup(c)
1208         defer stage.teardown(c)
1209         projects := 50
1210         collectionsPerProject := 2
1211         for i := 0; i < projects; i++ {
1212                 var subproj arvados.Group
1213                 err := stage.arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
1214                         "group": map[string]interface{}{
1215                                 "owner_uuid":  stage.subproj.UUID,
1216                                 "group_class": "project",
1217                                 "name":        fmt.Sprintf("keep-web s3 test subproject %d", i),
1218                         },
1219                 })
1220                 c.Assert(err, check.IsNil)
1221                 for j := 0; j < collectionsPerProject; j++ {
1222                         err = stage.arv.RequestAndDecode(nil, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
1223                                 "owner_uuid":    subproj.UUID,
1224                                 "name":          fmt.Sprintf("keep-web s3 test collection %d", j),
1225                                 "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
1226                         }})
1227                         c.Assert(err, check.IsNil)
1228                 }
1229         }
1230         c.Logf("setup complete")
1231
1232         sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
1233                 Region:           aws_aws.String("auto"),
1234                 Endpoint:         aws_aws.String(s.testServer.URL),
1235                 Credentials:      aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
1236                 S3ForcePathStyle: aws_aws.Bool(true),
1237         }))
1238         client := aws_s3.New(sess)
1239         ctx := context.Background()
1240         params := aws_s3.ListObjectsV2Input{
1241                 Bucket:    aws_aws.String(stage.proj.UUID),
1242                 Delimiter: aws_aws.String("/"),
1243                 Prefix:    aws_aws.String("keep-web s3 test subproject/"),
1244                 MaxKeys:   aws_aws.Int64(int64(projects / 2)),
1245         }
1246         for page := 1; ; page++ {
1247                 t0 := time.Now()
1248                 result, err := client.ListObjectsV2WithContext(ctx, &params)
1249                 if !c.Check(err, check.IsNil) {
1250                         break
1251                 }
1252                 c.Logf("got page %d in %v with len(Contents) == %d, len(CommonPrefixes) == %d", page, time.Since(t0), len(result.Contents), len(result.CommonPrefixes))
1253                 if !*result.IsTruncated {
1254                         break
1255                 }
1256                 params.ContinuationToken = result.NextContinuationToken
1257                 *params.MaxKeys = *params.MaxKeys/2 + 1
1258         }
1259 }
1260
1261 func (s *IntegrationSuite) TestS3ListObjectsV2(c *check.C) {
1262         stage := s.s3setup(c)
1263         defer stage.teardown(c)
1264         dirs := 2
1265         filesPerDir := 40
1266         stage.writeBigDirs(c, dirs, filesPerDir)
1267
1268         sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
1269                 Region:           aws_aws.String("auto"),
1270                 Endpoint:         aws_aws.String(s.testServer.URL),
1271                 Credentials:      aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
1272                 S3ForcePathStyle: aws_aws.Bool(true),
1273         }))
1274
1275         stringOrNil := func(s string) *string {
1276                 if s == "" {
1277                         return nil
1278                 } else {
1279                         return &s
1280                 }
1281         }
1282
1283         client := aws_s3.New(sess)
1284         ctx := context.Background()
1285
1286         for _, trial := range []struct {
1287                 prefix               string
1288                 delimiter            string
1289                 startAfter           string
1290                 maxKeys              int
1291                 expectKeys           int
1292                 expectCommonPrefixes map[string]bool
1293         }{
1294                 {
1295                         // Expect {filesPerDir plus the dir itself}
1296                         // for each dir, plus emptydir, emptyfile, and
1297                         // sailboat.txt.
1298                         expectKeys: (filesPerDir+1)*dirs + 3,
1299                 },
1300                 {
1301                         maxKeys:    15,
1302                         expectKeys: (filesPerDir+1)*dirs + 3,
1303                 },
1304                 {
1305                         startAfter: "dir0/z",
1306                         maxKeys:    15,
1307                         // Expect {filesPerDir plus the dir itself}
1308                         // for each dir except dir0, plus emptydir,
1309                         // emptyfile, and sailboat.txt.
1310                         expectKeys: (filesPerDir+1)*(dirs-1) + 3,
1311                 },
1312                 {
1313                         maxKeys:              1,
1314                         delimiter:            "/",
1315                         expectKeys:           2, // emptyfile, sailboat.txt
1316                         expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true, "emptydir/": true},
1317                 },
1318                 {
1319                         startAfter:           "dir0/z",
1320                         maxKeys:              15,
1321                         delimiter:            "/",
1322                         expectKeys:           2, // emptyfile, sailboat.txt
1323                         expectCommonPrefixes: map[string]bool{"dir1/": true, "emptydir/": true},
1324                 },
1325                 {
1326                         startAfter:           "dir0/file10.txt",
1327                         maxKeys:              15,
1328                         delimiter:            "/",
1329                         expectKeys:           2,
1330                         expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true, "emptydir/": true},
1331                 },
1332                 {
1333                         startAfter:           "dir0/file10.txt",
1334                         maxKeys:              15,
1335                         prefix:               "d",
1336                         delimiter:            "/",
1337                         expectKeys:           0,
1338                         expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true},
1339                 },
1340         } {
1341                 c.Logf("[trial %+v]", trial)
1342                 params := aws_s3.ListObjectsV2Input{
1343                         Bucket:     aws_aws.String(stage.collbucket.Name),
1344                         Prefix:     stringOrNil(trial.prefix),
1345                         Delimiter:  stringOrNil(trial.delimiter),
1346                         StartAfter: stringOrNil(trial.startAfter),
1347                         MaxKeys:    aws_aws.Int64(int64(trial.maxKeys)),
1348                 }
1349                 keySeen := map[string]bool{}
1350                 prefixSeen := map[string]bool{}
1351                 for {
1352                         result, err := client.ListObjectsV2WithContext(ctx, &params)
1353                         if !c.Check(err, check.IsNil) {
1354                                 break
1355                         }
1356                         c.Check(result.Name, check.DeepEquals, aws_aws.String(stage.collbucket.Name))
1357                         c.Check(result.Prefix, check.DeepEquals, aws_aws.String(trial.prefix))
1358                         c.Check(result.Delimiter, check.DeepEquals, aws_aws.String(trial.delimiter))
1359                         // The following two fields are expected to be
1360                         // nil (i.e., no tag in XML response) rather
1361                         // than "" when the corresponding request
1362                         // field was empty or nil.
1363                         c.Check(result.StartAfter, check.DeepEquals, stringOrNil(trial.startAfter))
1364                         c.Check(result.ContinuationToken, check.DeepEquals, params.ContinuationToken)
1365
1366                         if trial.maxKeys > 0 {
1367                                 c.Check(result.MaxKeys, check.DeepEquals, aws_aws.Int64(int64(trial.maxKeys)))
1368                                 c.Check(len(result.Contents)+len(result.CommonPrefixes) <= trial.maxKeys, check.Equals, true)
1369                         } else {
1370                                 c.Check(result.MaxKeys, check.DeepEquals, aws_aws.Int64(int64(s3MaxKeys)))
1371                         }
1372
1373                         for _, ent := range result.Contents {
1374                                 c.Assert(ent.Key, check.NotNil)
1375                                 c.Check(*ent.Key > trial.startAfter, check.Equals, true)
1376                                 c.Check(keySeen[*ent.Key], check.Equals, false, check.Commentf("dup key %q", *ent.Key))
1377                                 keySeen[*ent.Key] = true
1378                         }
1379                         for _, ent := range result.CommonPrefixes {
1380                                 c.Assert(ent.Prefix, check.NotNil)
1381                                 c.Check(strings.HasSuffix(*ent.Prefix, trial.delimiter), check.Equals, true, check.Commentf("bad CommonPrefix %q", *ent.Prefix))
1382                                 if strings.HasPrefix(trial.startAfter, *ent.Prefix) {
1383                                         // If we asked for
1384                                         // startAfter=dir0/file10.txt,
1385                                         // we expect dir0/ to be
1386                                         // returned as a common prefix
1387                                 } else {
1388                                         c.Check(*ent.Prefix > trial.startAfter, check.Equals, true)
1389                                 }
1390                                 c.Check(prefixSeen[*ent.Prefix], check.Equals, false, check.Commentf("dup common prefix %q", *ent.Prefix))
1391                                 prefixSeen[*ent.Prefix] = true
1392                         }
1393                         if *result.IsTruncated && c.Check(result.NextContinuationToken, check.Not(check.Equals), "") {
1394                                 params.ContinuationToken = aws_aws.String(*result.NextContinuationToken)
1395                         } else {
1396                                 break
1397                         }
1398                 }
1399                 c.Check(keySeen, check.HasLen, trial.expectKeys)
1400                 c.Check(prefixSeen, check.HasLen, len(trial.expectCommonPrefixes))
1401                 if len(trial.expectCommonPrefixes) > 0 {
1402                         c.Check(prefixSeen, check.DeepEquals, trial.expectCommonPrefixes)
1403                 }
1404         }
1405 }
1406
1407 func (s *IntegrationSuite) TestS3ListObjectsV2EncodingTypeURL(c *check.C) {
1408         stage := s.s3setup(c)
1409         defer stage.teardown(c)
1410         dirs := 2
1411         filesPerDir := 40
1412         stage.writeBigDirs(c, dirs, filesPerDir)
1413
1414         sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
1415                 Region:           aws_aws.String("auto"),
1416                 Endpoint:         aws_aws.String(s.testServer.URL),
1417                 Credentials:      aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
1418                 S3ForcePathStyle: aws_aws.Bool(true),
1419         }))
1420
1421         client := aws_s3.New(sess)
1422         ctx := context.Background()
1423
1424         result, err := client.ListObjectsV2WithContext(ctx, &aws_s3.ListObjectsV2Input{
1425                 Bucket:       aws_aws.String(stage.collbucket.Name),
1426                 Prefix:       aws_aws.String("dir0/"),
1427                 Delimiter:    aws_aws.String("/"),
1428                 StartAfter:   aws_aws.String("dir0/"),
1429                 EncodingType: aws_aws.String("url"),
1430         })
1431         c.Assert(err, check.IsNil)
1432         c.Check(*result.Prefix, check.Equals, "dir0%2F")
1433         c.Check(*result.Delimiter, check.Equals, "%2F")
1434         c.Check(*result.StartAfter, check.Equals, "dir0%2F")
1435         for _, ent := range result.Contents {
1436                 c.Check(*ent.Key, check.Matches, "dir0%2F.*")
1437         }
1438         result, err = client.ListObjectsV2WithContext(ctx, &aws_s3.ListObjectsV2Input{
1439                 Bucket:       aws_aws.String(stage.collbucket.Name),
1440                 Delimiter:    aws_aws.String("/"),
1441                 EncodingType: aws_aws.String("url"),
1442         })
1443         c.Assert(err, check.IsNil)
1444         c.Check(*result.Delimiter, check.Equals, "%2F")
1445         c.Check(result.CommonPrefixes, check.HasLen, dirs+1)
1446         for _, ent := range result.CommonPrefixes {
1447                 c.Check(*ent.Prefix, check.Matches, ".*%2F")
1448         }
1449 }
1450
1451 // TestS3cmd checks compatibility with the s3cmd command line tool, if
1452 // it's installed (run-tests normally takes care of that).
1453 func (s *IntegrationSuite) TestS3cmd(c *check.C) {
1454         if _, err := exec.LookPath("s3cmd"); err != nil {
1455                 c.Skip("s3cmd not found")
1456                 return
1457         }
1458
1459         stage := s.s3setup(c)
1460         defer stage.teardown(c)
1461
1462         cmd := exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "ls", "s3://"+arvadostest.FooCollection)
1463         buf, err := cmd.CombinedOutput()
1464         c.Check(err, check.IsNil)
1465         c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`)
1466
1467         // This tests whether s3cmd's path normalization agrees with
1468         // keep-web's signature verification wrt chars like "|"
1469         // (neither reserved nor unreserved) and "," (not normally
1470         // percent-encoded in a path).
1471         tmpfile := c.MkDir() + "/dstfile"
1472         cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo,;$[|]bar", tmpfile)
1473         buf, err = cmd.CombinedOutput()
1474         c.Check(err, check.NotNil)
1475         // As of commit b7520e5c25e1bf25c1a8bf5aa2eadb299be8f606
1476         // (between debian bullseye and bookworm versions), s3cmd
1477         // started catching the NoSuchKey error code and replacing it
1478         // with "Source object '%s' does not exist.".
1479         c.Check(string(buf), check.Matches, `(?ms).*(NoSuchKey|Source object.*does not exist).*\n`)
1480
1481         tmpfile = c.MkDir() + "/foo"
1482         cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo", tmpfile)
1483         buf, err = cmd.CombinedOutput()
1484         c.Logf("%s", buf)
1485         if c.Check(err, check.IsNil) {
1486                 checkcontent, err := os.ReadFile(tmpfile)
1487                 c.Check(err, check.IsNil)
1488                 c.Check(string(checkcontent), check.Equals, "foo")
1489         }
1490 }
1491
1492 func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) {
1493         stage := s.s3setup(c)
1494         defer stage.teardown(c)
1495
1496         hdr, body, _ := s.runCurl(c, "AWS "+arvadostest.ActiveTokenV2+":none", stage.coll.UUID+".collections.example.com", "/sailboat.txt")
1497         c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
1498         c.Check(body, check.Equals, "⛵\n")
1499 }