Merge branch '16535-s3'
[arvados.git] / services / keep-web / s3_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "bytes"
9         "crypto/rand"
10         "fmt"
11         "io/ioutil"
12         "net/http"
13         "os"
14         "strings"
15         "sync"
16         "time"
17
18         "git.arvados.org/arvados.git/sdk/go/arvados"
19         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
20         "git.arvados.org/arvados.git/sdk/go/arvadostest"
21         "git.arvados.org/arvados.git/sdk/go/keepclient"
22         "github.com/AdRoll/goamz/aws"
23         "github.com/AdRoll/goamz/s3"
24         check "gopkg.in/check.v1"
25 )
26
27 type s3stage struct {
28         arv        *arvados.Client
29         ac         *arvadosclient.ArvadosClient
30         kc         *keepclient.KeepClient
31         proj       arvados.Group
32         projbucket *s3.Bucket
33         coll       arvados.Collection
34         collbucket *s3.Bucket
35 }
36
37 func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
38         var proj arvados.Group
39         var coll arvados.Collection
40         arv := arvados.NewClientFromEnv()
41         arv.AuthToken = arvadostest.ActiveToken
42         err := arv.RequestAndDecode(&proj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
43                 "group": map[string]interface{}{
44                         "group_class": "project",
45                         "name":        "keep-web s3 test",
46                 },
47                 "ensure_unique_name": true,
48         })
49         c.Assert(err, check.IsNil)
50         err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
51                 "owner_uuid":    proj.UUID,
52                 "name":          "keep-web s3 test collection",
53                 "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
54         }})
55         c.Assert(err, check.IsNil)
56         ac, err := arvadosclient.New(arv)
57         c.Assert(err, check.IsNil)
58         kc, err := keepclient.MakeKeepClient(ac)
59         c.Assert(err, check.IsNil)
60         fs, err := coll.FileSystem(arv, kc)
61         c.Assert(err, check.IsNil)
62         f, err := fs.OpenFile("sailboat.txt", os.O_CREATE|os.O_WRONLY, 0644)
63         c.Assert(err, check.IsNil)
64         _, err = f.Write([]byte("⛵\n"))
65         c.Assert(err, check.IsNil)
66         err = f.Close()
67         c.Assert(err, check.IsNil)
68         err = fs.Sync()
69         c.Assert(err, check.IsNil)
70         err = arv.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil)
71         c.Assert(err, check.IsNil)
72
73         auth := aws.NewAuth(arvadostest.ActiveTokenV2, arvadostest.ActiveTokenV2, "", time.Now().Add(time.Hour))
74         region := aws.Region{
75                 Name:       s.testServer.Addr,
76                 S3Endpoint: "http://" + s.testServer.Addr,
77         }
78         client := s3.New(*auth, region)
79         return s3stage{
80                 arv:  arv,
81                 ac:   ac,
82                 kc:   kc,
83                 proj: proj,
84                 projbucket: &s3.Bucket{
85                         S3:   client,
86                         Name: proj.UUID,
87                 },
88                 coll: coll,
89                 collbucket: &s3.Bucket{
90                         S3:   client,
91                         Name: coll.UUID,
92                 },
93         }
94 }
95
96 func (stage s3stage) teardown(c *check.C) {
97         if stage.coll.UUID != "" {
98                 err := stage.arv.RequestAndDecode(&stage.coll, "DELETE", "arvados/v1/collections/"+stage.coll.UUID, nil, nil)
99                 c.Check(err, check.IsNil)
100         }
101         if stage.proj.UUID != "" {
102                 err := stage.arv.RequestAndDecode(&stage.proj, "DELETE", "arvados/v1/groups/"+stage.proj.UUID, nil, nil)
103                 c.Check(err, check.IsNil)
104         }
105 }
106
107 func (s *IntegrationSuite) TestS3HeadBucket(c *check.C) {
108         stage := s.s3setup(c)
109         defer stage.teardown(c)
110
111         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
112                 c.Logf("bucket %s", bucket.Name)
113                 exists, err := bucket.Exists("")
114                 c.Check(err, check.IsNil)
115                 c.Check(exists, check.Equals, true)
116         }
117 }
118
119 func (s *IntegrationSuite) TestS3CollectionGetObject(c *check.C) {
120         stage := s.s3setup(c)
121         defer stage.teardown(c)
122         s.testS3GetObject(c, stage.collbucket, "")
123 }
124 func (s *IntegrationSuite) TestS3ProjectGetObject(c *check.C) {
125         stage := s.s3setup(c)
126         defer stage.teardown(c)
127         s.testS3GetObject(c, stage.projbucket, stage.coll.Name+"/")
128 }
129 func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix string) {
130         rdr, err := bucket.GetReader(prefix + "emptyfile")
131         c.Assert(err, check.IsNil)
132         buf, err := ioutil.ReadAll(rdr)
133         c.Check(err, check.IsNil)
134         c.Check(len(buf), check.Equals, 0)
135         err = rdr.Close()
136         c.Check(err, check.IsNil)
137
138         // GetObject
139         rdr, err = bucket.GetReader(prefix + "missingfile")
140         c.Check(err, check.ErrorMatches, `404 Not Found`)
141
142         // HeadObject
143         exists, err := bucket.Exists(prefix + "missingfile")
144         c.Check(err, check.IsNil)
145         c.Check(exists, check.Equals, false)
146
147         // GetObject
148         rdr, err = bucket.GetReader(prefix + "sailboat.txt")
149         c.Assert(err, check.IsNil)
150         buf, err = ioutil.ReadAll(rdr)
151         c.Check(err, check.IsNil)
152         c.Check(buf, check.DeepEquals, []byte("⛵\n"))
153         err = rdr.Close()
154         c.Check(err, check.IsNil)
155
156         // HeadObject
157         exists, err = bucket.Exists(prefix + "sailboat.txt")
158         c.Check(err, check.IsNil)
159         c.Check(exists, check.Equals, true)
160 }
161
162 func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
163         stage := s.s3setup(c)
164         defer stage.teardown(c)
165         s.testS3PutObjectSuccess(c, stage.collbucket, "")
166 }
167 func (s *IntegrationSuite) TestS3ProjectPutObjectSuccess(c *check.C) {
168         stage := s.s3setup(c)
169         defer stage.teardown(c)
170         s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/")
171 }
172 func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string) {
173         for _, trial := range []struct {
174                 path        string
175                 size        int
176                 contentType string
177         }{
178                 {
179                         path:        "newfile",
180                         size:        128000000,
181                         contentType: "application/octet-stream",
182                 }, {
183                         path:        "newdir/newfile",
184                         size:        1 << 26,
185                         contentType: "application/octet-stream",
186                 }, {
187                         path:        "newdir1/newdir2/newfile",
188                         size:        0,
189                         contentType: "application/octet-stream",
190                 }, {
191                         path:        "newdir1/newdir2/newdir3/",
192                         size:        0,
193                         contentType: "application/x-directory",
194                 },
195         } {
196                 c.Logf("=== %v", trial)
197
198                 objname := prefix + trial.path
199
200                 _, err := bucket.GetReader(objname)
201                 c.Assert(err, check.ErrorMatches, `404 Not Found`)
202
203                 buf := make([]byte, trial.size)
204                 rand.Read(buf)
205
206                 err = bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
207                 c.Check(err, check.IsNil)
208
209                 rdr, err := bucket.GetReader(objname)
210                 if strings.HasSuffix(trial.path, "/") && !s.testServer.Config.cluster.Collections.S3FolderObjects {
211                         c.Check(err, check.NotNil)
212                         continue
213                 } else if !c.Check(err, check.IsNil) {
214                         continue
215                 }
216                 buf2, err := ioutil.ReadAll(rdr)
217                 c.Check(err, check.IsNil)
218                 c.Check(buf2, check.HasLen, len(buf))
219                 c.Check(bytes.Equal(buf, buf2), check.Equals, true)
220         }
221 }
222
223 func (s *IntegrationSuite) TestS3ProjectPutObjectNotSupported(c *check.C) {
224         stage := s.s3setup(c)
225         defer stage.teardown(c)
226         bucket := stage.projbucket
227
228         for _, trial := range []struct {
229                 path        string
230                 size        int
231                 contentType string
232         }{
233                 {
234                         path:        "newfile",
235                         size:        1234,
236                         contentType: "application/octet-stream",
237                 }, {
238                         path:        "newdir/newfile",
239                         size:        1234,
240                         contentType: "application/octet-stream",
241                 }, {
242                         path:        "newdir2/",
243                         size:        0,
244                         contentType: "application/x-directory",
245                 },
246         } {
247                 c.Logf("=== %v", trial)
248
249                 _, err := bucket.GetReader(trial.path)
250                 c.Assert(err, check.ErrorMatches, `404 Not Found`)
251
252                 buf := make([]byte, trial.size)
253                 rand.Read(buf)
254
255                 err = bucket.PutReader(trial.path, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
256                 c.Check(err, check.ErrorMatches, `400 Bad Request`)
257
258                 _, err = bucket.GetReader(trial.path)
259                 c.Assert(err, check.ErrorMatches, `404 Not Found`)
260         }
261 }
262
263 func (s *IntegrationSuite) TestS3CollectionDeleteObject(c *check.C) {
264         stage := s.s3setup(c)
265         defer stage.teardown(c)
266         s.testS3DeleteObject(c, stage.collbucket, "")
267 }
268 func (s *IntegrationSuite) TestS3ProjectDeleteObject(c *check.C) {
269         stage := s.s3setup(c)
270         defer stage.teardown(c)
271         s.testS3DeleteObject(c, stage.projbucket, stage.coll.Name+"/")
272 }
273 func (s *IntegrationSuite) testS3DeleteObject(c *check.C, bucket *s3.Bucket, prefix string) {
274         s.testServer.Config.cluster.Collections.S3FolderObjects = true
275         for _, trial := range []struct {
276                 path string
277         }{
278                 {"/"},
279                 {"nonexistentfile"},
280                 {"emptyfile"},
281                 {"sailboat.txt"},
282                 {"sailboat.txt/"},
283                 {"emptydir"},
284                 {"emptydir/"},
285         } {
286                 objname := prefix + trial.path
287                 comment := check.Commentf("objname %q", objname)
288
289                 err := bucket.Del(objname)
290                 if trial.path == "/" {
291                         c.Check(err, check.NotNil)
292                         continue
293                 }
294                 c.Check(err, check.IsNil, comment)
295                 _, err = bucket.GetReader(objname)
296                 c.Check(err, check.NotNil, comment)
297         }
298 }
299
300 func (s *IntegrationSuite) TestS3CollectionPutObjectFailure(c *check.C) {
301         stage := s.s3setup(c)
302         defer stage.teardown(c)
303         s.testS3PutObjectFailure(c, stage.collbucket, "")
304 }
305 func (s *IntegrationSuite) TestS3ProjectPutObjectFailure(c *check.C) {
306         stage := s.s3setup(c)
307         defer stage.teardown(c)
308         s.testS3PutObjectFailure(c, stage.projbucket, stage.coll.Name+"/")
309 }
310 func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, prefix string) {
311         s.testServer.Config.cluster.Collections.S3FolderObjects = false
312         var wg sync.WaitGroup
313         for _, trial := range []struct {
314                 path string
315         }{
316                 {
317                         path: "emptyfile/newname", // emptyfile exists, see s3setup()
318                 }, {
319                         path: "emptyfile/", // emptyfile exists, see s3setup()
320                 }, {
321                         path: "emptydir", // dir already exists, see s3setup()
322                 }, {
323                         path: "emptydir/",
324                 }, {
325                         path: "emptydir//",
326                 }, {
327                         path: "newdir/",
328                 }, {
329                         path: "newdir//",
330                 }, {
331                         path: "/",
332                 }, {
333                         path: "//",
334                 }, {
335                         path: "foo//bar",
336                 }, {
337                         path: "",
338                 },
339         } {
340                 trial := trial
341                 wg.Add(1)
342                 go func() {
343                         defer wg.Done()
344                         c.Logf("=== %v", trial)
345
346                         objname := prefix + trial.path
347
348                         buf := make([]byte, 1234)
349                         rand.Read(buf)
350
351                         err := bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), "application/octet-stream", s3.Private, s3.Options{})
352                         if !c.Check(err, check.ErrorMatches, `400 Bad.*`, check.Commentf("PUT %q should fail", objname)) {
353                                 return
354                         }
355
356                         if objname != "" && objname != "/" {
357                                 _, err = bucket.GetReader(objname)
358                                 c.Check(err, check.ErrorMatches, `404 Not Found`, check.Commentf("GET %q should return 404", objname))
359                         }
360                 }()
361         }
362         wg.Wait()
363 }
364
365 func (stage *s3stage) writeBigDirs(c *check.C, dirs int, filesPerDir int) {
366         fs, err := stage.coll.FileSystem(stage.arv, stage.kc)
367         c.Assert(err, check.IsNil)
368         for d := 0; d < dirs; d++ {
369                 dir := fmt.Sprintf("dir%d", d)
370                 c.Assert(fs.Mkdir(dir, 0755), check.IsNil)
371                 for i := 0; i < filesPerDir; i++ {
372                         f, err := fs.OpenFile(fmt.Sprintf("%s/file%d.txt", dir, i), os.O_CREATE|os.O_WRONLY, 0644)
373                         c.Assert(err, check.IsNil)
374                         c.Assert(f.Close(), check.IsNil)
375                 }
376         }
377         c.Assert(fs.Sync(), check.IsNil)
378 }
379
380 func (s *IntegrationSuite) TestS3GetBucketVersioning(c *check.C) {
381         stage := s.s3setup(c)
382         defer stage.teardown(c)
383         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
384                 req, err := http.NewRequest("GET", bucket.URL("/"), nil)
385                 c.Check(err, check.IsNil)
386                 req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
387                 req.URL.RawQuery = "versioning"
388                 resp, err := http.DefaultClient.Do(req)
389                 c.Assert(err, check.IsNil)
390                 c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
391                 buf, err := ioutil.ReadAll(resp.Body)
392                 c.Assert(err, check.IsNil)
393                 c.Check(string(buf), check.Equals, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<VersioningConfiguration xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\"/>\n")
394         }
395 }
396
397 func (s *IntegrationSuite) TestS3CollectionList(c *check.C) {
398         stage := s.s3setup(c)
399         defer stage.teardown(c)
400
401         var markers int
402         for markers, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} {
403                 dirs := 2
404                 filesPerDir := 1001
405                 stage.writeBigDirs(c, dirs, filesPerDir)
406                 // Total # objects is:
407                 //                 2 file entries from s3setup (emptyfile and sailboat.txt)
408                 //                +1 fake "directory" marker from s3setup (emptydir) (if enabled)
409                 //             +dirs fake "directory" marker from writeBigDirs (dir0/, dir1/) (if enabled)
410                 // +filesPerDir*dirs file entries from writeBigDirs (dir0/file0.txt, etc.)
411                 s.testS3List(c, stage.collbucket, "", 4000, markers+2+(filesPerDir+markers)*dirs)
412                 s.testS3List(c, stage.collbucket, "", 131, markers+2+(filesPerDir+markers)*dirs)
413                 s.testS3List(c, stage.collbucket, "dir0/", 71, filesPerDir+markers)
414         }
415 }
416 func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix string, pageSize, expectFiles int) {
417         c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.testServer.Config.cluster.Collections.S3FolderObjects)
418         expectPageSize := pageSize
419         if expectPageSize > 1000 {
420                 expectPageSize = 1000
421         }
422         gotKeys := map[string]s3.Key{}
423         nextMarker := ""
424         pages := 0
425         for {
426                 resp, err := bucket.List(prefix, "", nextMarker, pageSize)
427                 if !c.Check(err, check.IsNil) {
428                         break
429                 }
430                 c.Check(len(resp.Contents) <= expectPageSize, check.Equals, true)
431                 if pages++; !c.Check(pages <= (expectFiles/expectPageSize)+1, check.Equals, true) {
432                         break
433                 }
434                 for _, key := range resp.Contents {
435                         gotKeys[key.Key] = key
436                         if strings.Contains(key.Key, "sailboat.txt") {
437                                 c.Check(key.Size, check.Equals, int64(4))
438                         }
439                 }
440                 if !resp.IsTruncated {
441                         c.Check(resp.NextMarker, check.Equals, "")
442                         break
443                 }
444                 if !c.Check(resp.NextMarker, check.Not(check.Equals), "") {
445                         break
446                 }
447                 nextMarker = resp.NextMarker
448         }
449         c.Check(len(gotKeys), check.Equals, expectFiles)
450 }
451
452 func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) {
453         for _, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} {
454                 s.testS3CollectionListRollup(c)
455         }
456 }
457
458 func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) {
459         stage := s.s3setup(c)
460         defer stage.teardown(c)
461
462         dirs := 2
463         filesPerDir := 500
464         stage.writeBigDirs(c, dirs, filesPerDir)
465         err := stage.collbucket.PutReader("dingbats", &bytes.Buffer{}, 0, "application/octet-stream", s3.Private, s3.Options{})
466         c.Assert(err, check.IsNil)
467         var allfiles []string
468         for marker := ""; ; {
469                 resp, err := stage.collbucket.List("", "", marker, 20000)
470                 c.Check(err, check.IsNil)
471                 for _, key := range resp.Contents {
472                         if len(allfiles) == 0 || allfiles[len(allfiles)-1] != key.Key {
473                                 allfiles = append(allfiles, key.Key)
474                         }
475                 }
476                 marker = resp.NextMarker
477                 if marker == "" {
478                         break
479                 }
480         }
481         markers := 0
482         if s.testServer.Config.cluster.Collections.S3FolderObjects {
483                 markers = 1
484         }
485         c.Check(allfiles, check.HasLen, dirs*(filesPerDir+markers)+3+markers)
486
487         gotDirMarker := map[string]bool{}
488         for _, name := range allfiles {
489                 isDirMarker := strings.HasSuffix(name, "/")
490                 if markers == 0 {
491                         c.Check(isDirMarker, check.Equals, false, check.Commentf("name %q", name))
492                 } else if isDirMarker {
493                         gotDirMarker[name] = true
494                 } else if i := strings.LastIndex(name, "/"); i >= 0 {
495                         c.Check(gotDirMarker[name[:i+1]], check.Equals, true, check.Commentf("name %q", name))
496                         gotDirMarker[name[:i+1]] = true // skip redundant complaints about this dir marker
497                 }
498         }
499
500         for _, trial := range []struct {
501                 prefix    string
502                 delimiter string
503                 marker    string
504         }{
505                 {"", "", ""},
506                 {"di", "/", ""},
507                 {"di", "r", ""},
508                 {"di", "n", ""},
509                 {"dir0", "/", ""},
510                 {"dir0/", "/", ""},
511                 {"dir0/f", "/", ""},
512                 {"dir0", "", ""},
513                 {"dir0/", "", ""},
514                 {"dir0/f", "", ""},
515                 {"dir0", "/", "dir0/file14.txt"},       // no commonprefixes
516                 {"", "", "dir0/file14.txt"},            // middle page, skip walking dir1
517                 {"", "", "dir1/file14.txt"},            // middle page, skip walking dir0
518                 {"", "", "dir1/file498.txt"},           // last page of results
519                 {"dir1/file", "", "dir1/file498.txt"},  // last page of results, with prefix
520                 {"dir1/file", "/", "dir1/file498.txt"}, // last page of results, with prefix + delimiter
521                 {"dir1", "Z", "dir1/file498.txt"},      // delimiter "Z" never appears
522                 {"dir2", "/", ""},                      // prefix "dir2" does not exist
523                 {"", "/", ""},
524         } {
525                 c.Logf("\n\n=== trial %+v markers=%d", trial, markers)
526
527                 maxKeys := 20
528                 resp, err := stage.collbucket.List(trial.prefix, trial.delimiter, trial.marker, maxKeys)
529                 c.Check(err, check.IsNil)
530                 if resp.IsTruncated && trial.delimiter == "" {
531                         // goamz List method fills in the missing
532                         // NextMarker field if resp.IsTruncated, so
533                         // now we can't really tell whether it was
534                         // sent by the server or by goamz. In cases
535                         // where it should be empty but isn't, assume
536                         // it's goamz's fault.
537                         resp.NextMarker = ""
538                 }
539
540                 var expectKeys []string
541                 var expectPrefixes []string
542                 var expectNextMarker string
543                 var expectTruncated bool
544                 for _, key := range allfiles {
545                         full := len(expectKeys)+len(expectPrefixes) >= maxKeys
546                         if !strings.HasPrefix(key, trial.prefix) || key < trial.marker {
547                                 continue
548                         } else if idx := strings.Index(key[len(trial.prefix):], trial.delimiter); trial.delimiter != "" && idx >= 0 {
549                                 prefix := key[:len(trial.prefix)+idx+1]
550                                 if len(expectPrefixes) > 0 && expectPrefixes[len(expectPrefixes)-1] == prefix {
551                                         // same prefix as previous key
552                                 } else if full {
553                                         expectNextMarker = key
554                                         expectTruncated = true
555                                 } else {
556                                         expectPrefixes = append(expectPrefixes, prefix)
557                                 }
558                         } else if full {
559                                 if trial.delimiter != "" {
560                                         expectNextMarker = key
561                                 }
562                                 expectTruncated = true
563                                 break
564                         } else {
565                                 expectKeys = append(expectKeys, key)
566                         }
567                 }
568
569                 var gotKeys []string
570                 for _, key := range resp.Contents {
571                         gotKeys = append(gotKeys, key.Key)
572                 }
573                 var gotPrefixes []string
574                 for _, prefix := range resp.CommonPrefixes {
575                         gotPrefixes = append(gotPrefixes, prefix)
576                 }
577                 commentf := check.Commentf("trial %+v markers=%d", trial, markers)
578                 c.Check(gotKeys, check.DeepEquals, expectKeys, commentf)
579                 c.Check(gotPrefixes, check.DeepEquals, expectPrefixes, commentf)
580                 c.Check(resp.NextMarker, check.Equals, expectNextMarker, commentf)
581                 c.Check(resp.IsTruncated, check.Equals, expectTruncated, commentf)
582                 c.Logf("=== trial %+v keys %q prefixes %q nextMarker %q", trial, gotKeys, gotPrefixes, resp.NextMarker)
583         }
584 }