Update test failing on debian 12 if s3cmd is installed.
[arvados.git] / services / keep-web / s3_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "bytes"
9         "context"
10         "crypto/rand"
11         "crypto/sha256"
12         "fmt"
13         "io/ioutil"
14         "mime"
15         "net/http"
16         "net/http/httptest"
17         "net/url"
18         "os"
19         "os/exec"
20         "sort"
21         "strings"
22         "sync"
23         "time"
24
25         "git.arvados.org/arvados.git/sdk/go/arvados"
26         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
27         "git.arvados.org/arvados.git/sdk/go/arvadostest"
28         "git.arvados.org/arvados.git/sdk/go/keepclient"
29         "github.com/AdRoll/goamz/aws"
30         "github.com/AdRoll/goamz/s3"
31         aws_aws "github.com/aws/aws-sdk-go/aws"
32         aws_credentials "github.com/aws/aws-sdk-go/aws/credentials"
33         aws_session "github.com/aws/aws-sdk-go/aws/session"
34         aws_s3 "github.com/aws/aws-sdk-go/service/s3"
35         check "gopkg.in/check.v1"
36 )
37
38 type s3stage struct {
39         arv        *arvados.Client
40         ac         *arvadosclient.ArvadosClient
41         kc         *keepclient.KeepClient
42         proj       arvados.Group
43         projbucket *s3.Bucket
44         subproj    arvados.Group
45         coll       arvados.Collection
46         collbucket *s3.Bucket
47 }
48
49 func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
50         var proj, subproj arvados.Group
51         var coll arvados.Collection
52         arv := arvados.NewClientFromEnv()
53         arv.AuthToken = arvadostest.ActiveToken
54         err := arv.RequestAndDecode(&proj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
55                 "group": map[string]interface{}{
56                         "group_class": "project",
57                         "name":        "keep-web s3 test",
58                         "properties": map[string]interface{}{
59                                 "project-properties-key": "project properties value",
60                         },
61                 },
62                 "ensure_unique_name": true,
63         })
64         c.Assert(err, check.IsNil)
65         err = arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
66                 "group": map[string]interface{}{
67                         "owner_uuid":  proj.UUID,
68                         "group_class": "project",
69                         "name":        "keep-web s3 test subproject",
70                         "properties": map[string]interface{}{
71                                 "subproject_properties_key": "subproject properties value",
72                                 "invalid header key":        "this value will not be returned because key contains spaces",
73                         },
74                 },
75         })
76         c.Assert(err, check.IsNil)
77         err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
78                 "owner_uuid":    proj.UUID,
79                 "name":          "keep-web s3 test collection",
80                 "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
81                 "properties": map[string]interface{}{
82                         "string":   "string value",
83                         "array":    []string{"element1", "element2"},
84                         "object":   map[string]interface{}{"key": map[string]interface{}{"key2": "value⛵"}},
85                         "nonascii": "⛵",
86                         "newline":  "foo\r\nX-Bad: header",
87                         // This key cannot be expressed as a MIME
88                         // header key, so it will be silently skipped
89                         // (see "Inject" in PropertiesAsMetadata test)
90                         "a: a\r\nInject": "bogus",
91                 },
92         }})
93         c.Assert(err, check.IsNil)
94         ac, err := arvadosclient.New(arv)
95         c.Assert(err, check.IsNil)
96         kc, err := keepclient.MakeKeepClient(ac)
97         c.Assert(err, check.IsNil)
98         fs, err := coll.FileSystem(arv, kc)
99         c.Assert(err, check.IsNil)
100         f, err := fs.OpenFile("sailboat.txt", os.O_CREATE|os.O_WRONLY, 0644)
101         c.Assert(err, check.IsNil)
102         _, err = f.Write([]byte("⛵\n"))
103         c.Assert(err, check.IsNil)
104         err = f.Close()
105         c.Assert(err, check.IsNil)
106         err = fs.Sync()
107         c.Assert(err, check.IsNil)
108         err = arv.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil)
109         c.Assert(err, check.IsNil)
110
111         auth := aws.NewAuth(arvadostest.ActiveTokenUUID, arvadostest.ActiveToken, "", time.Now().Add(time.Hour))
112         region := aws.Region{
113                 Name:       "zzzzz",
114                 S3Endpoint: s.testServer.URL,
115         }
116         client := s3.New(*auth, region)
117         client.Signature = aws.V4Signature
118         return s3stage{
119                 arv:  arv,
120                 ac:   ac,
121                 kc:   kc,
122                 proj: proj,
123                 projbucket: &s3.Bucket{
124                         S3:   client,
125                         Name: proj.UUID,
126                 },
127                 subproj: subproj,
128                 coll:    coll,
129                 collbucket: &s3.Bucket{
130                         S3:   client,
131                         Name: coll.UUID,
132                 },
133         }
134 }
135
136 func (stage s3stage) teardown(c *check.C) {
137         if stage.coll.UUID != "" {
138                 err := stage.arv.RequestAndDecode(&stage.coll, "DELETE", "arvados/v1/collections/"+stage.coll.UUID, nil, nil)
139                 c.Check(err, check.IsNil)
140         }
141         if stage.proj.UUID != "" {
142                 err := stage.arv.RequestAndDecode(&stage.proj, "DELETE", "arvados/v1/groups/"+stage.proj.UUID, nil, nil)
143                 c.Check(err, check.IsNil)
144         }
145 }
146
147 func (s *IntegrationSuite) TestS3Signatures(c *check.C) {
148         stage := s.s3setup(c)
149         defer stage.teardown(c)
150
151         bucket := stage.collbucket
152         for _, trial := range []struct {
153                 success   bool
154                 signature int
155                 accesskey string
156                 secretkey string
157         }{
158                 {true, aws.V2Signature, arvadostest.ActiveToken, "none"},
159                 {true, aws.V2Signature, url.QueryEscape(arvadostest.ActiveTokenV2), "none"},
160                 {true, aws.V2Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), "none"},
161                 {false, aws.V2Signature, "none", "none"},
162                 {false, aws.V2Signature, "none", arvadostest.ActiveToken},
163
164                 {true, aws.V4Signature, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken},
165                 {true, aws.V4Signature, arvadostest.ActiveToken, arvadostest.ActiveToken},
166                 {true, aws.V4Signature, url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2)},
167                 {true, aws.V4Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1)},
168                 {false, aws.V4Signature, arvadostest.ActiveToken, ""},
169                 {false, aws.V4Signature, arvadostest.ActiveToken, "none"},
170                 {false, aws.V4Signature, "none", arvadostest.ActiveToken},
171                 {false, aws.V4Signature, "none", "none"},
172         } {
173                 c.Logf("%#v", trial)
174                 bucket.S3.Auth = *(aws.NewAuth(trial.accesskey, trial.secretkey, "", time.Now().Add(time.Hour)))
175                 bucket.S3.Signature = trial.signature
176                 _, err := bucket.GetReader("emptyfile")
177                 if trial.success {
178                         c.Check(err, check.IsNil)
179                 } else {
180                         c.Check(err, check.NotNil)
181                 }
182         }
183 }
184
185 func (s *IntegrationSuite) TestS3HeadBucket(c *check.C) {
186         stage := s.s3setup(c)
187         defer stage.teardown(c)
188
189         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
190                 c.Logf("bucket %s", bucket.Name)
191                 exists, err := bucket.Exists("")
192                 c.Check(err, check.IsNil)
193                 c.Check(exists, check.Equals, true)
194         }
195 }
196
197 func (s *IntegrationSuite) TestS3CollectionGetObject(c *check.C) {
198         stage := s.s3setup(c)
199         defer stage.teardown(c)
200         s.testS3GetObject(c, stage.collbucket, "")
201 }
202 func (s *IntegrationSuite) TestS3ProjectGetObject(c *check.C) {
203         stage := s.s3setup(c)
204         defer stage.teardown(c)
205         s.testS3GetObject(c, stage.projbucket, stage.coll.Name+"/")
206 }
207 func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix string) {
208         rdr, err := bucket.GetReader(prefix + "emptyfile")
209         c.Assert(err, check.IsNil)
210         buf, err := ioutil.ReadAll(rdr)
211         c.Check(err, check.IsNil)
212         c.Check(len(buf), check.Equals, 0)
213         err = rdr.Close()
214         c.Check(err, check.IsNil)
215
216         // GetObject
217         rdr, err = bucket.GetReader(prefix + "missingfile")
218         c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
219         c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
220         c.Check(err, check.ErrorMatches, `The specified key does not exist.`)
221
222         // HeadObject
223         exists, err := bucket.Exists(prefix + "missingfile")
224         c.Check(err, check.IsNil)
225         c.Check(exists, check.Equals, false)
226
227         // GetObject
228         rdr, err = bucket.GetReader(prefix + "sailboat.txt")
229         c.Assert(err, check.IsNil)
230         buf, err = ioutil.ReadAll(rdr)
231         c.Check(err, check.IsNil)
232         c.Check(buf, check.DeepEquals, []byte("⛵\n"))
233         err = rdr.Close()
234         c.Check(err, check.IsNil)
235
236         // HeadObject
237         resp, err := bucket.Head(prefix+"sailboat.txt", nil)
238         c.Check(err, check.IsNil)
239         c.Check(resp.StatusCode, check.Equals, http.StatusOK)
240         c.Check(resp.ContentLength, check.Equals, int64(4))
241
242         // HeadObject with superfluous leading slashes
243         exists, err = bucket.Exists(prefix + "//sailboat.txt")
244         c.Check(err, check.IsNil)
245         c.Check(exists, check.Equals, true)
246 }
247
248 func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) {
249         got := map[string]string{}
250         for hk, hv := range hdr {
251                 if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 {
252                         got[k] = hv[0]
253                 }
254         }
255         c.Check(got, check.DeepEquals, expect)
256 }
257
258 func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) {
259         stage := s.s3setup(c)
260         defer stage.teardown(c)
261
262         expectCollectionTags := map[string]string{
263                 "String":   "string value",
264                 "Array":    `["element1","element2"]`,
265                 "Object":   mime.BEncoding.Encode("UTF-8", `{"key":{"key2":"value⛵"}}`),
266                 "Nonascii": "=?UTF-8?b?4pu1?=",
267                 "Newline":  mime.BEncoding.Encode("UTF-8", "foo\r\nX-Bad: header"),
268         }
269         expectSubprojectTags := map[string]string{
270                 "Subproject_properties_key": "subproject properties value",
271         }
272         expectProjectTags := map[string]string{
273                 "Project-Properties-Key": "project properties value",
274         }
275
276         c.Log("HEAD object with metadata from collection")
277         resp, err := stage.collbucket.Head("sailboat.txt", nil)
278         c.Assert(err, check.IsNil)
279         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
280
281         c.Log("GET object with metadata from collection")
282         rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt")
283         c.Assert(err, check.IsNil)
284         content, err := ioutil.ReadAll(rdr)
285         c.Check(err, check.IsNil)
286         rdr.Close()
287         c.Check(content, check.HasLen, 4)
288         s.checkMetaEquals(c, hdr, expectCollectionTags)
289         c.Check(hdr["Inject"], check.IsNil)
290
291         c.Log("HEAD bucket with metadata from collection")
292         resp, err = stage.collbucket.Head("/", nil)
293         c.Assert(err, check.IsNil)
294         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
295
296         c.Log("HEAD directory placeholder with metadata from collection")
297         resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil)
298         c.Assert(err, check.IsNil)
299         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
300
301         c.Log("HEAD file with metadata from collection")
302         resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil)
303         c.Assert(err, check.IsNil)
304         s.checkMetaEquals(c, resp.Header, expectCollectionTags)
305
306         c.Log("HEAD directory placeholder with metadata from subproject")
307         resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil)
308         c.Assert(err, check.IsNil)
309         s.checkMetaEquals(c, resp.Header, expectSubprojectTags)
310
311         c.Log("HEAD bucket with metadata from project")
312         resp, err = stage.projbucket.Head("/", nil)
313         c.Assert(err, check.IsNil)
314         s.checkMetaEquals(c, resp.Header, expectProjectTags)
315 }
316
317 func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
318         stage := s.s3setup(c)
319         defer stage.teardown(c)
320         s.testS3PutObjectSuccess(c, stage.collbucket, "", stage.coll.UUID)
321 }
322 func (s *IntegrationSuite) TestS3ProjectPutObjectSuccess(c *check.C) {
323         stage := s.s3setup(c)
324         defer stage.teardown(c)
325         s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/", stage.coll.UUID)
326 }
327 func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string, collUUID string) {
328         // We insert a delay between test cases to ensure we exercise
329         // rollover of expired sessions.
330         sleep := time.Second / 100
331         s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(sleep * 3)
332
333         for _, trial := range []struct {
334                 path        string
335                 size        int
336                 contentType string
337         }{
338                 {
339                         path:        "newfile",
340                         size:        128000000,
341                         contentType: "application/octet-stream",
342                 }, {
343                         path:        "newdir/newfile",
344                         size:        1 << 26,
345                         contentType: "application/octet-stream",
346                 }, {
347                         path:        "/aaa",
348                         size:        2,
349                         contentType: "application/octet-stream",
350                 }, {
351                         path:        "//bbb",
352                         size:        2,
353                         contentType: "application/octet-stream",
354                 }, {
355                         path:        "ccc//",
356                         size:        0,
357                         contentType: "application/x-directory",
358                 }, {
359                         path:        "newdir1/newdir2/newfile",
360                         size:        0,
361                         contentType: "application/octet-stream",
362                 }, {
363                         path:        "newdir1/newdir2/newdir3/",
364                         size:        0,
365                         contentType: "application/x-directory",
366                 },
367         } {
368                 time.Sleep(sleep)
369                 c.Logf("=== %v", trial)
370
371                 objname := prefix + trial.path
372
373                 _, err := bucket.GetReader(objname)
374                 if !c.Check(err, check.NotNil) {
375                         continue
376                 }
377                 c.Check(err.(*s3.Error).StatusCode, check.Equals, http.StatusNotFound)
378                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
379                 if !c.Check(err, check.ErrorMatches, `The specified key does not exist.`) {
380                         continue
381                 }
382
383                 buf := make([]byte, trial.size)
384                 rand.Read(buf)
385
386                 err = bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
387                 c.Check(err, check.IsNil)
388
389                 rdr, err := bucket.GetReader(objname)
390                 if strings.HasSuffix(trial.path, "/") && !s.handler.Cluster.Collections.S3FolderObjects {
391                         c.Check(err, check.NotNil)
392                         continue
393                 } else if !c.Check(err, check.IsNil) {
394                         continue
395                 }
396                 buf2, err := ioutil.ReadAll(rdr)
397                 c.Check(err, check.IsNil)
398                 c.Check(buf2, check.HasLen, len(buf))
399                 c.Check(bytes.Equal(buf, buf2), check.Equals, true)
400
401                 // Check that the change is immediately visible via
402                 // (non-S3) webdav request.
403                 _, resp := s.do("GET", "http://"+collUUID+".keep-web.example/"+trial.path, arvadostest.ActiveTokenV2, nil)
404                 c.Check(resp.Code, check.Equals, http.StatusOK)
405                 if !strings.HasSuffix(trial.path, "/") {
406                         c.Check(resp.Body.Len(), check.Equals, trial.size)
407                 }
408         }
409 }
410
411 func (s *IntegrationSuite) TestS3ProjectPutObjectNotSupported(c *check.C) {
412         stage := s.s3setup(c)
413         defer stage.teardown(c)
414         bucket := stage.projbucket
415
416         for _, trial := range []struct {
417                 path        string
418                 size        int
419                 contentType string
420         }{
421                 {
422                         path:        "newfile",
423                         size:        1234,
424                         contentType: "application/octet-stream",
425                 }, {
426                         path:        "newdir/newfile",
427                         size:        1234,
428                         contentType: "application/octet-stream",
429                 }, {
430                         path:        "newdir2/",
431                         size:        0,
432                         contentType: "application/x-directory",
433                 },
434         } {
435                 c.Logf("=== %v", trial)
436
437                 _, err := bucket.GetReader(trial.path)
438                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
439                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
440                 c.Assert(err, check.ErrorMatches, `The specified key does not exist.`)
441
442                 buf := make([]byte, trial.size)
443                 rand.Read(buf)
444
445                 err = bucket.PutReader(trial.path, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
446                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 400)
447                 c.Check(err.(*s3.Error).Code, check.Equals, `InvalidArgument`)
448                 c.Check(err, check.ErrorMatches, `(mkdir "/by_id/zzzzz-j7d0g-[a-z0-9]{15}/newdir2?"|open "/zzzzz-j7d0g-[a-z0-9]{15}/newfile") failed: invalid (argument|operation)`)
449
450                 _, err = bucket.GetReader(trial.path)
451                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
452                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
453                 c.Assert(err, check.ErrorMatches, `The specified key does not exist.`)
454         }
455 }
456
457 func (s *IntegrationSuite) TestS3CollectionDeleteObject(c *check.C) {
458         stage := s.s3setup(c)
459         defer stage.teardown(c)
460         s.testS3DeleteObject(c, stage.collbucket, "")
461 }
462 func (s *IntegrationSuite) TestS3ProjectDeleteObject(c *check.C) {
463         stage := s.s3setup(c)
464         defer stage.teardown(c)
465         s.testS3DeleteObject(c, stage.projbucket, stage.coll.Name+"/")
466 }
467 func (s *IntegrationSuite) testS3DeleteObject(c *check.C, bucket *s3.Bucket, prefix string) {
468         s.handler.Cluster.Collections.S3FolderObjects = true
469         for _, trial := range []struct {
470                 path string
471         }{
472                 {"/"},
473                 {"nonexistentfile"},
474                 {"emptyfile"},
475                 {"sailboat.txt"},
476                 {"sailboat.txt/"},
477                 {"emptydir"},
478                 {"emptydir/"},
479         } {
480                 objname := prefix + trial.path
481                 comment := check.Commentf("objname %q", objname)
482
483                 err := bucket.Del(objname)
484                 if trial.path == "/" {
485                         c.Check(err, check.NotNil)
486                         continue
487                 }
488                 c.Check(err, check.IsNil, comment)
489                 _, err = bucket.GetReader(objname)
490                 c.Check(err, check.NotNil, comment)
491         }
492 }
493
494 func (s *IntegrationSuite) TestS3CollectionPutObjectFailure(c *check.C) {
495         stage := s.s3setup(c)
496         defer stage.teardown(c)
497         s.testS3PutObjectFailure(c, stage.collbucket, "")
498 }
499 func (s *IntegrationSuite) TestS3ProjectPutObjectFailure(c *check.C) {
500         stage := s.s3setup(c)
501         defer stage.teardown(c)
502         s.testS3PutObjectFailure(c, stage.projbucket, stage.coll.Name+"/")
503 }
504 func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, prefix string) {
505         s.handler.Cluster.Collections.S3FolderObjects = false
506
507         var wg sync.WaitGroup
508         for _, trial := range []struct {
509                 path string
510         }{
511                 {
512                         path: "emptyfile/newname", // emptyfile exists, see s3setup()
513                 }, {
514                         path: "emptyfile/", // emptyfile exists, see s3setup()
515                 }, {
516                         path: "emptydir", // dir already exists, see s3setup()
517                 }, {
518                         path: "emptydir/",
519                 }, {
520                         path: "emptydir//",
521                 }, {
522                         path: "newdir/",
523                 }, {
524                         path: "newdir//",
525                 }, {
526                         path: "/",
527                 }, {
528                         path: "//",
529                 }, {
530                         path: "",
531                 },
532         } {
533                 trial := trial
534                 wg.Add(1)
535                 go func() {
536                         defer wg.Done()
537                         c.Logf("=== %v", trial)
538
539                         objname := prefix + trial.path
540
541                         buf := make([]byte, 1234)
542                         rand.Read(buf)
543
544                         err := bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), "application/octet-stream", s3.Private, s3.Options{})
545                         if !c.Check(err, check.ErrorMatches, `(invalid object name.*|open ".*" failed.*|object name conflicts with existing object|Missing object name in PUT request.)`, check.Commentf("PUT %q should fail", objname)) {
546                                 return
547                         }
548
549                         if objname != "" && objname != "/" {
550                                 _, err = bucket.GetReader(objname)
551                                 c.Check(err.(*s3.Error).StatusCode, check.Equals, 404)
552                                 c.Check(err.(*s3.Error).Code, check.Equals, `NoSuchKey`)
553                                 c.Check(err, check.ErrorMatches, `The specified key does not exist.`, check.Commentf("GET %q should return 404", objname))
554                         }
555                 }()
556         }
557         wg.Wait()
558 }
559
560 func (stage *s3stage) writeBigDirs(c *check.C, dirs int, filesPerDir int) {
561         fs, err := stage.coll.FileSystem(stage.arv, stage.kc)
562         c.Assert(err, check.IsNil)
563         for d := 0; d < dirs; d++ {
564                 dir := fmt.Sprintf("dir%d", d)
565                 c.Assert(fs.Mkdir(dir, 0755), check.IsNil)
566                 for i := 0; i < filesPerDir; i++ {
567                         f, err := fs.OpenFile(fmt.Sprintf("%s/file%d.txt", dir, i), os.O_CREATE|os.O_WRONLY, 0644)
568                         c.Assert(err, check.IsNil)
569                         c.Assert(f.Close(), check.IsNil)
570                 }
571         }
572         c.Assert(fs.Sync(), check.IsNil)
573 }
574
575 func (s *IntegrationSuite) sign(c *check.C, req *http.Request, key, secret string) {
576         scope := "20200202/zzzzz/service/aws4_request"
577         signedHeaders := "date"
578         req.Header.Set("Date", time.Now().UTC().Format(time.RFC1123))
579         stringToSign, err := s3stringToSign(s3SignAlgorithm, scope, signedHeaders, req)
580         c.Assert(err, check.IsNil)
581         sig, err := s3signature(secret, scope, signedHeaders, stringToSign)
582         c.Assert(err, check.IsNil)
583         req.Header.Set("Authorization", s3SignAlgorithm+" Credential="+key+"/"+scope+", SignedHeaders="+signedHeaders+", Signature="+sig)
584 }
585
586 func (s *IntegrationSuite) TestS3VirtualHostStyleRequests(c *check.C) {
587         stage := s.s3setup(c)
588         defer stage.teardown(c)
589         for _, trial := range []struct {
590                 url            string
591                 method         string
592                 body           string
593                 responseCode   int
594                 responseRegexp []string
595         }{
596                 {
597                         url:            "https://" + stage.collbucket.Name + ".example.com/",
598                         method:         "GET",
599                         responseCode:   http.StatusOK,
600                         responseRegexp: []string{`(?ms).*sailboat\.txt.*`},
601                 },
602                 {
603                         url:            "https://" + strings.Replace(stage.coll.PortableDataHash, "+", "-", -1) + ".example.com/",
604                         method:         "GET",
605                         responseCode:   http.StatusOK,
606                         responseRegexp: []string{`(?ms).*sailboat\.txt.*`},
607                 },
608                 {
609                         url:            "https://" + stage.projbucket.Name + ".example.com/?prefix=" + stage.coll.Name + "/&delimiter=/",
610                         method:         "GET",
611                         responseCode:   http.StatusOK,
612                         responseRegexp: []string{`(?ms).*sailboat\.txt.*`},
613                 },
614                 {
615                         url:            "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/sailboat.txt",
616                         method:         "GET",
617                         responseCode:   http.StatusOK,
618                         responseRegexp: []string{`⛵\n`},
619                 },
620                 {
621                         url:          "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/beep",
622                         method:       "PUT",
623                         body:         "boop",
624                         responseCode: http.StatusOK,
625                 },
626                 {
627                         url:            "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "/beep",
628                         method:         "GET",
629                         responseCode:   http.StatusOK,
630                         responseRegexp: []string{`boop`},
631                 },
632                 {
633                         url:          "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop",
634                         method:       "GET",
635                         responseCode: http.StatusNotFound,
636                 },
637                 {
638                         url:          "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop",
639                         method:       "PUT",
640                         body:         "boop",
641                         responseCode: http.StatusOK,
642                 },
643                 {
644                         url:            "https://" + stage.projbucket.Name + ".example.com/" + stage.coll.Name + "//boop",
645                         method:         "GET",
646                         responseCode:   http.StatusOK,
647                         responseRegexp: []string{`boop`},
648                 },
649         } {
650                 url, err := url.Parse(trial.url)
651                 c.Assert(err, check.IsNil)
652                 req, err := http.NewRequest(trial.method, url.String(), bytes.NewReader([]byte(trial.body)))
653                 c.Assert(err, check.IsNil)
654                 s.sign(c, req, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken)
655                 rr := httptest.NewRecorder()
656                 s.handler.ServeHTTP(rr, req)
657                 resp := rr.Result()
658                 c.Check(resp.StatusCode, check.Equals, trial.responseCode)
659                 body, err := ioutil.ReadAll(resp.Body)
660                 c.Assert(err, check.IsNil)
661                 for _, re := range trial.responseRegexp {
662                         c.Check(string(body), check.Matches, re)
663                 }
664         }
665 }
666
667 func (s *IntegrationSuite) TestS3NormalizeURIForSignature(c *check.C) {
668         stage := s.s3setup(c)
669         defer stage.teardown(c)
670         for _, trial := range []struct {
671                 rawPath        string
672                 normalizedPath string
673         }{
674                 {"/foo", "/foo"},                           // boring case
675                 {"/foo%5fbar", "/foo_bar"},                 // _ must not be escaped
676                 {"/foo%2fbar", "/foo/bar"},                 // / must not be escaped
677                 {"/(foo)/[];,", "/%28foo%29/%5B%5D%3B%2C"}, // ()[];, must be escaped
678                 {"/foo%5bbar", "/foo%5Bbar"},               // %XX must be uppercase
679                 {"//foo///.bar", "/foo/.bar"},              // "//" and "///" must be squashed to "/"
680         } {
681                 c.Logf("trial %q", trial)
682
683                 date := time.Now().UTC().Format("20060102T150405Z")
684                 scope := "20200202/zzzzz/S3/aws4_request"
685                 canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", "GET", trial.normalizedPath, "", "host:host.example.com\n", "host", "")
686                 c.Logf("canonicalRequest %q", canonicalRequest)
687                 expect := fmt.Sprintf("%s\n%s\n%s\n%s", s3SignAlgorithm, date, scope, hashdigest(sha256.New(), canonicalRequest))
688                 c.Logf("expected stringToSign %q", expect)
689
690                 req, err := http.NewRequest("GET", "https://host.example.com"+trial.rawPath, nil)
691                 req.Header.Set("X-Amz-Date", date)
692                 req.Host = "host.example.com"
693                 c.Assert(err, check.IsNil)
694
695                 obtained, err := s3stringToSign(s3SignAlgorithm, scope, "host", req)
696                 if !c.Check(err, check.IsNil) {
697                         continue
698                 }
699                 c.Check(obtained, check.Equals, expect)
700         }
701 }
702
703 func (s *IntegrationSuite) TestS3GetBucketLocation(c *check.C) {
704         stage := s.s3setup(c)
705         defer stage.teardown(c)
706         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
707                 req, err := http.NewRequest("GET", bucket.URL("/"), nil)
708                 c.Check(err, check.IsNil)
709                 req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
710                 req.URL.RawQuery = "location"
711                 resp, err := http.DefaultClient.Do(req)
712                 c.Assert(err, check.IsNil)
713                 c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
714                 buf, err := ioutil.ReadAll(resp.Body)
715                 c.Assert(err, check.IsNil)
716                 c.Check(string(buf), check.Equals, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<LocationConstraint><LocationConstraint xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">zzzzz</LocationConstraint></LocationConstraint>\n")
717         }
718 }
719
720 func (s *IntegrationSuite) TestS3GetBucketVersioning(c *check.C) {
721         stage := s.s3setup(c)
722         defer stage.teardown(c)
723         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
724                 req, err := http.NewRequest("GET", bucket.URL("/"), nil)
725                 c.Check(err, check.IsNil)
726                 req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
727                 req.URL.RawQuery = "versioning"
728                 resp, err := http.DefaultClient.Do(req)
729                 c.Assert(err, check.IsNil)
730                 c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
731                 buf, err := ioutil.ReadAll(resp.Body)
732                 c.Assert(err, check.IsNil)
733                 c.Check(string(buf), check.Equals, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<VersioningConfiguration xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\"/>\n")
734         }
735 }
736
737 func (s *IntegrationSuite) TestS3UnsupportedAPIs(c *check.C) {
738         stage := s.s3setup(c)
739         defer stage.teardown(c)
740         for _, trial := range []struct {
741                 method   string
742                 path     string
743                 rawquery string
744         }{
745                 {"GET", "/", "acl&versionId=1234"},    // GetBucketAcl
746                 {"GET", "/foo", "acl&versionId=1234"}, // GetObjectAcl
747                 {"PUT", "/", "acl"},                   // PutBucketAcl
748                 {"PUT", "/foo", "acl"},                // PutObjectAcl
749                 {"DELETE", "/", "tagging"},            // DeleteBucketTagging
750                 {"DELETE", "/foo", "tagging"},         // DeleteObjectTagging
751         } {
752                 for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
753                         c.Logf("trial %v bucket %v", trial, bucket)
754                         req, err := http.NewRequest(trial.method, bucket.URL(trial.path), nil)
755                         c.Check(err, check.IsNil)
756                         req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
757                         req.URL.RawQuery = trial.rawquery
758                         resp, err := http.DefaultClient.Do(req)
759                         c.Assert(err, check.IsNil)
760                         c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
761                         buf, err := ioutil.ReadAll(resp.Body)
762                         c.Assert(err, check.IsNil)
763                         c.Check(string(buf), check.Matches, "(?ms).*InvalidRequest.*API not supported.*")
764                 }
765         }
766 }
767
768 // If there are no CommonPrefixes entries, the CommonPrefixes XML tag
769 // should not appear at all.
770 func (s *IntegrationSuite) TestS3ListNoCommonPrefixes(c *check.C) {
771         stage := s.s3setup(c)
772         defer stage.teardown(c)
773
774         req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil)
775         c.Assert(err, check.IsNil)
776         req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
777         req.URL.RawQuery = "prefix=asdfasdfasdf&delimiter=/"
778         resp, err := http.DefaultClient.Do(req)
779         c.Assert(err, check.IsNil)
780         buf, err := ioutil.ReadAll(resp.Body)
781         c.Assert(err, check.IsNil)
782         c.Check(string(buf), check.Not(check.Matches), `(?ms).*CommonPrefixes.*`)
783 }
784
785 // If there is no delimiter in the request, or the results are not
786 // truncated, the NextMarker XML tag should not appear in the response
787 // body.
788 func (s *IntegrationSuite) TestS3ListNoNextMarker(c *check.C) {
789         stage := s.s3setup(c)
790         defer stage.teardown(c)
791
792         for _, query := range []string{"prefix=e&delimiter=/", ""} {
793                 req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil)
794                 c.Assert(err, check.IsNil)
795                 req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
796                 req.URL.RawQuery = query
797                 resp, err := http.DefaultClient.Do(req)
798                 c.Assert(err, check.IsNil)
799                 buf, err := ioutil.ReadAll(resp.Body)
800                 c.Assert(err, check.IsNil)
801                 c.Check(string(buf), check.Not(check.Matches), `(?ms).*NextMarker.*`)
802         }
803 }
804
805 // List response should include KeyCount field.
806 func (s *IntegrationSuite) TestS3ListKeyCount(c *check.C) {
807         stage := s.s3setup(c)
808         defer stage.teardown(c)
809
810         req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil)
811         c.Assert(err, check.IsNil)
812         req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
813         req.URL.RawQuery = "prefix=&delimiter=/"
814         resp, err := http.DefaultClient.Do(req)
815         c.Assert(err, check.IsNil)
816         buf, err := ioutil.ReadAll(resp.Body)
817         c.Assert(err, check.IsNil)
818         c.Check(string(buf), check.Matches, `(?ms).*<KeyCount>2</KeyCount>.*`)
819 }
820
821 func (s *IntegrationSuite) TestS3CollectionList(c *check.C) {
822         stage := s.s3setup(c)
823         defer stage.teardown(c)
824
825         var markers int
826         for markers, s.handler.Cluster.Collections.S3FolderObjects = range []bool{false, true} {
827                 dirs := 2000
828                 filesPerDir := 2
829                 stage.writeBigDirs(c, dirs, filesPerDir)
830                 // Total # objects is:
831                 //                 2 file entries from s3setup (emptyfile and sailboat.txt)
832                 //                +1 fake "directory" marker from s3setup (emptydir) (if enabled)
833                 //             +dirs fake "directory" marker from writeBigDirs (dir0/, dir1/) (if enabled)
834                 // +filesPerDir*dirs file entries from writeBigDirs (dir0/file0.txt, etc.)
835                 s.testS3List(c, stage.collbucket, "", 4000, markers+2+(filesPerDir+markers)*dirs)
836                 s.testS3List(c, stage.collbucket, "", 131, markers+2+(filesPerDir+markers)*dirs)
837                 s.testS3List(c, stage.collbucket, "", 51, markers+2+(filesPerDir+markers)*dirs)
838                 s.testS3List(c, stage.collbucket, "dir0/", 71, filesPerDir+markers)
839         }
840 }
841 func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix string, pageSize, expectFiles int) {
842         c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.handler.Cluster.Collections.S3FolderObjects)
843         expectPageSize := pageSize
844         if expectPageSize > 1000 {
845                 expectPageSize = 1000
846         }
847         gotKeys := map[string]s3.Key{}
848         nextMarker := ""
849         pages := 0
850         for {
851                 resp, err := bucket.List(prefix, "", nextMarker, pageSize)
852                 if !c.Check(err, check.IsNil) {
853                         break
854                 }
855                 c.Check(len(resp.Contents) <= expectPageSize, check.Equals, true)
856                 if pages++; !c.Check(pages <= (expectFiles/expectPageSize)+1, check.Equals, true) {
857                         break
858                 }
859                 for _, key := range resp.Contents {
860                         if _, dup := gotKeys[key.Key]; dup {
861                                 c.Errorf("got duplicate key %q on page %d", key.Key, pages)
862                         }
863                         gotKeys[key.Key] = key
864                         if strings.Contains(key.Key, "sailboat.txt") {
865                                 c.Check(key.Size, check.Equals, int64(4))
866                         }
867                 }
868                 if !resp.IsTruncated {
869                         c.Check(resp.NextMarker, check.Equals, "")
870                         break
871                 }
872                 if !c.Check(resp.NextMarker, check.Not(check.Equals), "") {
873                         break
874                 }
875                 nextMarker = resp.NextMarker
876         }
877         if !c.Check(len(gotKeys), check.Equals, expectFiles) {
878                 var sorted []string
879                 for k := range gotKeys {
880                         sorted = append(sorted, k)
881                 }
882                 sort.Strings(sorted)
883                 for _, k := range sorted {
884                         c.Logf("got %s", k)
885                 }
886         }
887 }
888
889 func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) {
890         for _, s.handler.Cluster.Collections.S3FolderObjects = range []bool{false, true} {
891                 s.testS3CollectionListRollup(c)
892         }
893 }
894
895 func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) {
896         stage := s.s3setup(c)
897         defer stage.teardown(c)
898
899         dirs := 2
900         filesPerDir := 500
901         stage.writeBigDirs(c, dirs, filesPerDir)
902         err := stage.collbucket.PutReader("dingbats", &bytes.Buffer{}, 0, "application/octet-stream", s3.Private, s3.Options{})
903         c.Assert(err, check.IsNil)
904         var allfiles []string
905         for marker := ""; ; {
906                 resp, err := stage.collbucket.List("", "", marker, 20000)
907                 c.Check(err, check.IsNil)
908                 for _, key := range resp.Contents {
909                         if len(allfiles) == 0 || allfiles[len(allfiles)-1] != key.Key {
910                                 allfiles = append(allfiles, key.Key)
911                         }
912                 }
913                 marker = resp.NextMarker
914                 if marker == "" {
915                         break
916                 }
917         }
918         markers := 0
919         if s.handler.Cluster.Collections.S3FolderObjects {
920                 markers = 1
921         }
922         c.Check(allfiles, check.HasLen, dirs*(filesPerDir+markers)+3+markers)
923
924         gotDirMarker := map[string]bool{}
925         for _, name := range allfiles {
926                 isDirMarker := strings.HasSuffix(name, "/")
927                 if markers == 0 {
928                         c.Check(isDirMarker, check.Equals, false, check.Commentf("name %q", name))
929                 } else if isDirMarker {
930                         gotDirMarker[name] = true
931                 } else if i := strings.LastIndex(name, "/"); i >= 0 {
932                         c.Check(gotDirMarker[name[:i+1]], check.Equals, true, check.Commentf("name %q", name))
933                         gotDirMarker[name[:i+1]] = true // skip redundant complaints about this dir marker
934                 }
935         }
936
937         for _, trial := range []struct {
938                 prefix    string
939                 delimiter string
940                 marker    string
941         }{
942                 {"", "", ""},
943                 {"di", "/", ""},
944                 {"di", "r", ""},
945                 {"di", "n", ""},
946                 {"dir0", "/", ""},
947                 {"dir0/", "/", ""},
948                 {"dir0/f", "/", ""},
949                 {"dir0", "", ""},
950                 {"dir0/", "", ""},
951                 {"dir0/f", "", ""},
952                 {"dir0", "/", "dir0/file14.txt"},       // one commonprefix, "dir0/"
953                 {"dir0", "/", "dir0/zzzzfile.txt"},     // no commonprefixes
954                 {"", "", "dir0/file14.txt"},            // middle page, skip walking dir1
955                 {"", "", "dir1/file14.txt"},            // middle page, skip walking dir0
956                 {"", "", "dir1/file498.txt"},           // last page of results
957                 {"dir1/file", "", "dir1/file498.txt"},  // last page of results, with prefix
958                 {"dir1/file", "/", "dir1/file498.txt"}, // last page of results, with prefix + delimiter
959                 {"dir1", "Z", "dir1/file498.txt"},      // delimiter "Z" never appears
960                 {"dir2", "/", ""},                      // prefix "dir2" does not exist
961                 {"", "/", ""},
962         } {
963                 c.Logf("\n\n=== trial %+v markers=%d", trial, markers)
964
965                 maxKeys := 20
966                 resp, err := stage.collbucket.List(trial.prefix, trial.delimiter, trial.marker, maxKeys)
967                 c.Check(err, check.IsNil)
968                 if resp.IsTruncated && trial.delimiter == "" {
969                         // goamz List method fills in the missing
970                         // NextMarker field if resp.IsTruncated, so
971                         // now we can't really tell whether it was
972                         // sent by the server or by goamz. In cases
973                         // where it should be empty but isn't, assume
974                         // it's goamz's fault.
975                         resp.NextMarker = ""
976                 }
977
978                 var expectKeys []string
979                 var expectPrefixes []string
980                 var expectNextMarker string
981                 var expectTruncated bool
982                 for _, key := range allfiles {
983                         full := len(expectKeys)+len(expectPrefixes) >= maxKeys
984                         if !strings.HasPrefix(key, trial.prefix) || key <= trial.marker {
985                                 continue
986                         } else if idx := strings.Index(key[len(trial.prefix):], trial.delimiter); trial.delimiter != "" && idx >= 0 {
987                                 prefix := key[:len(trial.prefix)+idx+1]
988                                 if len(expectPrefixes) > 0 && expectPrefixes[len(expectPrefixes)-1] == prefix {
989                                         // same prefix as previous key
990                                 } else if full {
991                                         expectTruncated = true
992                                 } else {
993                                         expectPrefixes = append(expectPrefixes, prefix)
994                                         expectNextMarker = prefix
995                                 }
996                         } else if full {
997                                 expectTruncated = true
998                                 break
999                         } else {
1000                                 expectKeys = append(expectKeys, key)
1001                                 if trial.delimiter != "" {
1002                                         expectNextMarker = key
1003                                 }
1004                         }
1005                 }
1006                 if !expectTruncated {
1007                         expectNextMarker = ""
1008                 }
1009
1010                 var gotKeys []string
1011                 for _, key := range resp.Contents {
1012                         gotKeys = append(gotKeys, key.Key)
1013                 }
1014                 var gotPrefixes []string
1015                 for _, prefix := range resp.CommonPrefixes {
1016                         gotPrefixes = append(gotPrefixes, prefix)
1017                 }
1018                 commentf := check.Commentf("trial %+v markers=%d", trial, markers)
1019                 c.Check(gotKeys, check.DeepEquals, expectKeys, commentf)
1020                 c.Check(gotPrefixes, check.DeepEquals, expectPrefixes, commentf)
1021                 c.Check(resp.NextMarker, check.Equals, expectNextMarker, commentf)
1022                 c.Check(resp.IsTruncated, check.Equals, expectTruncated, commentf)
1023                 c.Logf("=== trial %+v keys %q prefixes %q nextMarker %q", trial, gotKeys, gotPrefixes, resp.NextMarker)
1024         }
1025 }
1026
1027 func (s *IntegrationSuite) TestS3ListObjectsV2ManySubprojects(c *check.C) {
1028         stage := s.s3setup(c)
1029         defer stage.teardown(c)
1030         projects := 50
1031         collectionsPerProject := 2
1032         for i := 0; i < projects; i++ {
1033                 var subproj arvados.Group
1034                 err := stage.arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
1035                         "group": map[string]interface{}{
1036                                 "owner_uuid":  stage.subproj.UUID,
1037                                 "group_class": "project",
1038                                 "name":        fmt.Sprintf("keep-web s3 test subproject %d", i),
1039                         },
1040                 })
1041                 c.Assert(err, check.IsNil)
1042                 for j := 0; j < collectionsPerProject; j++ {
1043                         err = stage.arv.RequestAndDecode(nil, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
1044                                 "owner_uuid":    subproj.UUID,
1045                                 "name":          fmt.Sprintf("keep-web s3 test collection %d", j),
1046                                 "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
1047                         }})
1048                         c.Assert(err, check.IsNil)
1049                 }
1050         }
1051         c.Logf("setup complete")
1052
1053         sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
1054                 Region:           aws_aws.String("auto"),
1055                 Endpoint:         aws_aws.String(s.testServer.URL),
1056                 Credentials:      aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
1057                 S3ForcePathStyle: aws_aws.Bool(true),
1058         }))
1059         client := aws_s3.New(sess)
1060         ctx := context.Background()
1061         params := aws_s3.ListObjectsV2Input{
1062                 Bucket:    aws_aws.String(stage.proj.UUID),
1063                 Delimiter: aws_aws.String("/"),
1064                 Prefix:    aws_aws.String("keep-web s3 test subproject/"),
1065                 MaxKeys:   aws_aws.Int64(int64(projects / 2)),
1066         }
1067         for page := 1; ; page++ {
1068                 t0 := time.Now()
1069                 result, err := client.ListObjectsV2WithContext(ctx, &params)
1070                 if !c.Check(err, check.IsNil) {
1071                         break
1072                 }
1073                 c.Logf("got page %d in %v with len(Contents) == %d, len(CommonPrefixes) == %d", page, time.Since(t0), len(result.Contents), len(result.CommonPrefixes))
1074                 if !*result.IsTruncated {
1075                         break
1076                 }
1077                 params.ContinuationToken = result.NextContinuationToken
1078                 *params.MaxKeys = *params.MaxKeys/2 + 1
1079         }
1080 }
1081
1082 func (s *IntegrationSuite) TestS3ListObjectsV2(c *check.C) {
1083         stage := s.s3setup(c)
1084         defer stage.teardown(c)
1085         dirs := 2
1086         filesPerDir := 40
1087         stage.writeBigDirs(c, dirs, filesPerDir)
1088
1089         sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
1090                 Region:           aws_aws.String("auto"),
1091                 Endpoint:         aws_aws.String(s.testServer.URL),
1092                 Credentials:      aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
1093                 S3ForcePathStyle: aws_aws.Bool(true),
1094         }))
1095
1096         stringOrNil := func(s string) *string {
1097                 if s == "" {
1098                         return nil
1099                 } else {
1100                         return &s
1101                 }
1102         }
1103
1104         client := aws_s3.New(sess)
1105         ctx := context.Background()
1106
1107         for _, trial := range []struct {
1108                 prefix               string
1109                 delimiter            string
1110                 startAfter           string
1111                 maxKeys              int
1112                 expectKeys           int
1113                 expectCommonPrefixes map[string]bool
1114         }{
1115                 {
1116                         // Expect {filesPerDir plus the dir itself}
1117                         // for each dir, plus emptydir, emptyfile, and
1118                         // sailboat.txt.
1119                         expectKeys: (filesPerDir+1)*dirs + 3,
1120                 },
1121                 {
1122                         maxKeys:    15,
1123                         expectKeys: (filesPerDir+1)*dirs + 3,
1124                 },
1125                 {
1126                         startAfter: "dir0/z",
1127                         maxKeys:    15,
1128                         // Expect {filesPerDir plus the dir itself}
1129                         // for each dir except dir0, plus emptydir,
1130                         // emptyfile, and sailboat.txt.
1131                         expectKeys: (filesPerDir+1)*(dirs-1) + 3,
1132                 },
1133                 {
1134                         maxKeys:              1,
1135                         delimiter:            "/",
1136                         expectKeys:           2, // emptyfile, sailboat.txt
1137                         expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true, "emptydir/": true},
1138                 },
1139                 {
1140                         startAfter:           "dir0/z",
1141                         maxKeys:              15,
1142                         delimiter:            "/",
1143                         expectKeys:           2, // emptyfile, sailboat.txt
1144                         expectCommonPrefixes: map[string]bool{"dir1/": true, "emptydir/": true},
1145                 },
1146                 {
1147                         startAfter:           "dir0/file10.txt",
1148                         maxKeys:              15,
1149                         delimiter:            "/",
1150                         expectKeys:           2,
1151                         expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true, "emptydir/": true},
1152                 },
1153                 {
1154                         startAfter:           "dir0/file10.txt",
1155                         maxKeys:              15,
1156                         prefix:               "d",
1157                         delimiter:            "/",
1158                         expectKeys:           0,
1159                         expectCommonPrefixes: map[string]bool{"dir0/": true, "dir1/": true},
1160                 },
1161         } {
1162                 c.Logf("[trial %+v]", trial)
1163                 params := aws_s3.ListObjectsV2Input{
1164                         Bucket:     aws_aws.String(stage.collbucket.Name),
1165                         Prefix:     stringOrNil(trial.prefix),
1166                         Delimiter:  stringOrNil(trial.delimiter),
1167                         StartAfter: stringOrNil(trial.startAfter),
1168                         MaxKeys:    aws_aws.Int64(int64(trial.maxKeys)),
1169                 }
1170                 keySeen := map[string]bool{}
1171                 prefixSeen := map[string]bool{}
1172                 for {
1173                         result, err := client.ListObjectsV2WithContext(ctx, &params)
1174                         if !c.Check(err, check.IsNil) {
1175                                 break
1176                         }
1177                         c.Check(result.Name, check.DeepEquals, aws_aws.String(stage.collbucket.Name))
1178                         c.Check(result.Prefix, check.DeepEquals, aws_aws.String(trial.prefix))
1179                         c.Check(result.Delimiter, check.DeepEquals, aws_aws.String(trial.delimiter))
1180                         // The following two fields are expected to be
1181                         // nil (i.e., no tag in XML response) rather
1182                         // than "" when the corresponding request
1183                         // field was empty or nil.
1184                         c.Check(result.StartAfter, check.DeepEquals, stringOrNil(trial.startAfter))
1185                         c.Check(result.ContinuationToken, check.DeepEquals, params.ContinuationToken)
1186
1187                         if trial.maxKeys > 0 {
1188                                 c.Check(result.MaxKeys, check.DeepEquals, aws_aws.Int64(int64(trial.maxKeys)))
1189                                 c.Check(len(result.Contents)+len(result.CommonPrefixes) <= trial.maxKeys, check.Equals, true)
1190                         } else {
1191                                 c.Check(result.MaxKeys, check.DeepEquals, aws_aws.Int64(int64(s3MaxKeys)))
1192                         }
1193
1194                         for _, ent := range result.Contents {
1195                                 c.Assert(ent.Key, check.NotNil)
1196                                 c.Check(*ent.Key > trial.startAfter, check.Equals, true)
1197                                 c.Check(keySeen[*ent.Key], check.Equals, false, check.Commentf("dup key %q", *ent.Key))
1198                                 keySeen[*ent.Key] = true
1199                         }
1200                         for _, ent := range result.CommonPrefixes {
1201                                 c.Assert(ent.Prefix, check.NotNil)
1202                                 c.Check(strings.HasSuffix(*ent.Prefix, trial.delimiter), check.Equals, true, check.Commentf("bad CommonPrefix %q", *ent.Prefix))
1203                                 if strings.HasPrefix(trial.startAfter, *ent.Prefix) {
1204                                         // If we asked for
1205                                         // startAfter=dir0/file10.txt,
1206                                         // we expect dir0/ to be
1207                                         // returned as a common prefix
1208                                 } else {
1209                                         c.Check(*ent.Prefix > trial.startAfter, check.Equals, true)
1210                                 }
1211                                 c.Check(prefixSeen[*ent.Prefix], check.Equals, false, check.Commentf("dup common prefix %q", *ent.Prefix))
1212                                 prefixSeen[*ent.Prefix] = true
1213                         }
1214                         if *result.IsTruncated && c.Check(result.NextContinuationToken, check.Not(check.Equals), "") {
1215                                 params.ContinuationToken = aws_aws.String(*result.NextContinuationToken)
1216                         } else {
1217                                 break
1218                         }
1219                 }
1220                 c.Check(keySeen, check.HasLen, trial.expectKeys)
1221                 c.Check(prefixSeen, check.HasLen, len(trial.expectCommonPrefixes))
1222                 if len(trial.expectCommonPrefixes) > 0 {
1223                         c.Check(prefixSeen, check.DeepEquals, trial.expectCommonPrefixes)
1224                 }
1225         }
1226 }
1227
1228 func (s *IntegrationSuite) TestS3ListObjectsV2EncodingTypeURL(c *check.C) {
1229         stage := s.s3setup(c)
1230         defer stage.teardown(c)
1231         dirs := 2
1232         filesPerDir := 40
1233         stage.writeBigDirs(c, dirs, filesPerDir)
1234
1235         sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
1236                 Region:           aws_aws.String("auto"),
1237                 Endpoint:         aws_aws.String(s.testServer.URL),
1238                 Credentials:      aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
1239                 S3ForcePathStyle: aws_aws.Bool(true),
1240         }))
1241
1242         client := aws_s3.New(sess)
1243         ctx := context.Background()
1244
1245         result, err := client.ListObjectsV2WithContext(ctx, &aws_s3.ListObjectsV2Input{
1246                 Bucket:       aws_aws.String(stage.collbucket.Name),
1247                 Prefix:       aws_aws.String("dir0/"),
1248                 Delimiter:    aws_aws.String("/"),
1249                 StartAfter:   aws_aws.String("dir0/"),
1250                 EncodingType: aws_aws.String("url"),
1251         })
1252         c.Assert(err, check.IsNil)
1253         c.Check(*result.Prefix, check.Equals, "dir0%2F")
1254         c.Check(*result.Delimiter, check.Equals, "%2F")
1255         c.Check(*result.StartAfter, check.Equals, "dir0%2F")
1256         for _, ent := range result.Contents {
1257                 c.Check(*ent.Key, check.Matches, "dir0%2F.*")
1258         }
1259         result, err = client.ListObjectsV2WithContext(ctx, &aws_s3.ListObjectsV2Input{
1260                 Bucket:       aws_aws.String(stage.collbucket.Name),
1261                 Delimiter:    aws_aws.String("/"),
1262                 EncodingType: aws_aws.String("url"),
1263         })
1264         c.Assert(err, check.IsNil)
1265         c.Check(*result.Delimiter, check.Equals, "%2F")
1266         c.Check(result.CommonPrefixes, check.HasLen, dirs+1)
1267         for _, ent := range result.CommonPrefixes {
1268                 c.Check(*ent.Prefix, check.Matches, ".*%2F")
1269         }
1270 }
1271
1272 // TestS3cmd checks compatibility with the s3cmd command line tool, if
1273 // it's installed. As of Debian buster, s3cmd is only in backports, so
1274 // `arvados-server install` don't install it, and this test skips if
1275 // it's not installed.
1276 func (s *IntegrationSuite) TestS3cmd(c *check.C) {
1277         if _, err := exec.LookPath("s3cmd"); err != nil {
1278                 c.Skip("s3cmd not found")
1279                 return
1280         }
1281
1282         stage := s.s3setup(c)
1283         defer stage.teardown(c)
1284
1285         cmd := exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "ls", "s3://"+arvadostest.FooCollection)
1286         buf, err := cmd.CombinedOutput()
1287         c.Check(err, check.IsNil)
1288         c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`)
1289
1290         // This tests whether s3cmd's path normalization agrees with
1291         // keep-web's signature verification wrt chars like "|"
1292         // (neither reserved nor unreserved) and "," (not normally
1293         // percent-encoded in a path).
1294         tmpfile := c.MkDir() + "/dstfile"
1295         cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.URL[7:], "--host-bucket="+s.testServer.URL[7:], "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo,;$[|]bar", tmpfile)
1296         buf, err = cmd.CombinedOutput()
1297         c.Check(err, check.NotNil)
1298         // As of commit b7520e5c25e1bf25c1a8bf5aa2eadb299be8f606
1299         // (between debian bullseye and bookworm versions), s3cmd
1300         // started catching the NoSuchKey error code and replacing it
1301         // with "Source object '%s' does not exist.".
1302         c.Check(string(buf), check.Matches, `(?ms).*(NoSuchKey|Source object.*does not exist).*\n`)
1303 }
1304
1305 func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) {
1306         stage := s.s3setup(c)
1307         defer stage.teardown(c)
1308
1309         hdr, body, _ := s.runCurl(c, "AWS "+arvadostest.ActiveTokenV2+":none", stage.coll.UUID+".collections.example.com", "/sailboat.txt")
1310         c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
1311         c.Check(body, check.Equals, "⛵\n")
1312 }