16535: Add HeadBucket API.
[arvados.git] / services / keep-web / s3_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "bytes"
9         "crypto/rand"
10         "fmt"
11         "io/ioutil"
12         "net/http"
13         "os"
14         "strings"
15         "sync"
16         "time"
17
18         "git.arvados.org/arvados.git/sdk/go/arvados"
19         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
20         "git.arvados.org/arvados.git/sdk/go/arvadostest"
21         "git.arvados.org/arvados.git/sdk/go/keepclient"
22         "github.com/AdRoll/goamz/aws"
23         "github.com/AdRoll/goamz/s3"
24         check "gopkg.in/check.v1"
25 )
26
27 type s3stage struct {
28         arv        *arvados.Client
29         ac         *arvadosclient.ArvadosClient
30         kc         *keepclient.KeepClient
31         proj       arvados.Group
32         projbucket *s3.Bucket
33         coll       arvados.Collection
34         collbucket *s3.Bucket
35 }
36
37 func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
38         var proj arvados.Group
39         var coll arvados.Collection
40         arv := arvados.NewClientFromEnv()
41         arv.AuthToken = arvadostest.ActiveToken
42         err := arv.RequestAndDecode(&proj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
43                 "group": map[string]interface{}{
44                         "group_class": "project",
45                         "name":        "keep-web s3 test",
46                 },
47                 "ensure_unique_name": true,
48         })
49         c.Assert(err, check.IsNil)
50         err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
51                 "owner_uuid":    proj.UUID,
52                 "name":          "keep-web s3 test collection",
53                 "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
54         }})
55         c.Assert(err, check.IsNil)
56         ac, err := arvadosclient.New(arv)
57         c.Assert(err, check.IsNil)
58         kc, err := keepclient.MakeKeepClient(ac)
59         c.Assert(err, check.IsNil)
60         fs, err := coll.FileSystem(arv, kc)
61         c.Assert(err, check.IsNil)
62         f, err := fs.OpenFile("sailboat.txt", os.O_CREATE|os.O_WRONLY, 0644)
63         c.Assert(err, check.IsNil)
64         _, err = f.Write([]byte("⛵\n"))
65         c.Assert(err, check.IsNil)
66         err = f.Close()
67         c.Assert(err, check.IsNil)
68         err = fs.Sync()
69         c.Assert(err, check.IsNil)
70         err = arv.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil)
71         c.Assert(err, check.IsNil)
72
73         auth := aws.NewAuth(arvadostest.ActiveTokenV2, arvadostest.ActiveTokenV2, "", time.Now().Add(time.Hour))
74         region := aws.Region{
75                 Name:       s.testServer.Addr,
76                 S3Endpoint: "http://" + s.testServer.Addr,
77         }
78         client := s3.New(*auth, region)
79         return s3stage{
80                 arv:  arv,
81                 ac:   ac,
82                 kc:   kc,
83                 proj: proj,
84                 projbucket: &s3.Bucket{
85                         S3:   client,
86                         Name: proj.UUID,
87                 },
88                 coll: coll,
89                 collbucket: &s3.Bucket{
90                         S3:   client,
91                         Name: coll.UUID,
92                 },
93         }
94 }
95
96 func (stage s3stage) teardown(c *check.C) {
97         if stage.coll.UUID != "" {
98                 err := stage.arv.RequestAndDecode(&stage.coll, "DELETE", "arvados/v1/collections/"+stage.coll.UUID, nil, nil)
99                 c.Check(err, check.IsNil)
100         }
101         if stage.proj.UUID != "" {
102                 err := stage.arv.RequestAndDecode(&stage.proj, "DELETE", "arvados/v1/groups/"+stage.proj.UUID, nil, nil)
103                 c.Check(err, check.IsNil)
104         }
105 }
106
107 func (s *IntegrationSuite) TestS3CollectionGetObject(c *check.C) {
108         stage := s.s3setup(c)
109         defer stage.teardown(c)
110         s.testS3GetObject(c, stage.collbucket, "")
111 }
112 func (s *IntegrationSuite) TestS3ProjectGetObject(c *check.C) {
113         stage := s.s3setup(c)
114         defer stage.teardown(c)
115         s.testS3GetObject(c, stage.projbucket, stage.coll.Name+"/")
116 }
117 func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix string) {
118         rdr, err := bucket.GetReader(prefix + "emptyfile")
119         c.Assert(err, check.IsNil)
120         buf, err := ioutil.ReadAll(rdr)
121         c.Check(err, check.IsNil)
122         c.Check(len(buf), check.Equals, 0)
123         err = rdr.Close()
124         c.Check(err, check.IsNil)
125
126         rdr, err = bucket.GetReader(prefix + "missingfile")
127         c.Check(err, check.ErrorMatches, `404 Not Found`)
128
129         rdr, err = bucket.GetReader(prefix + "sailboat.txt")
130         c.Assert(err, check.IsNil)
131         buf, err = ioutil.ReadAll(rdr)
132         c.Check(err, check.IsNil)
133         c.Check(buf, check.DeepEquals, []byte("⛵\n"))
134         err = rdr.Close()
135         c.Check(err, check.IsNil)
136 }
137
138 func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
139         stage := s.s3setup(c)
140         defer stage.teardown(c)
141         s.testS3PutObjectSuccess(c, stage.collbucket, "")
142 }
143 func (s *IntegrationSuite) TestS3ProjectPutObjectSuccess(c *check.C) {
144         stage := s.s3setup(c)
145         defer stage.teardown(c)
146         s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/")
147 }
148 func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string) {
149         for _, trial := range []struct {
150                 path string
151                 size int
152         }{
153                 {
154                         path: "newfile",
155                         size: 128000000,
156                 }, {
157                         path: "newdir/newfile",
158                         size: 1 << 26,
159                 }, {
160                         path: "newdir1/newdir2/newfile",
161                         size: 0,
162                 },
163         } {
164                 c.Logf("=== %v", trial)
165
166                 objname := prefix + trial.path
167
168                 _, err := bucket.GetReader(objname)
169                 c.Assert(err, check.ErrorMatches, `404 Not Found`)
170
171                 buf := make([]byte, trial.size)
172                 rand.Read(buf)
173
174                 err = bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), "application/octet-stream", s3.Private, s3.Options{})
175                 c.Check(err, check.IsNil)
176
177                 rdr, err := bucket.GetReader(objname)
178                 if !c.Check(err, check.IsNil) {
179                         continue
180                 }
181                 buf2, err := ioutil.ReadAll(rdr)
182                 c.Check(err, check.IsNil)
183                 c.Check(buf2, check.HasLen, len(buf))
184                 c.Check(bytes.Equal(buf, buf2), check.Equals, true)
185         }
186 }
187
188 func (s *IntegrationSuite) TestS3CollectionPutObjectFailure(c *check.C) {
189         stage := s.s3setup(c)
190         defer stage.teardown(c)
191         s.testS3PutObjectFailure(c, stage.collbucket, "")
192 }
193 func (s *IntegrationSuite) TestS3ProjectPutObjectFailure(c *check.C) {
194         stage := s.s3setup(c)
195         defer stage.teardown(c)
196         s.testS3PutObjectFailure(c, stage.projbucket, stage.coll.Name+"/")
197 }
198 func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, prefix string) {
199         var wg sync.WaitGroup
200         for _, trial := range []struct {
201                 path string
202         }{
203                 {
204                         path: "emptyfile/newname", // emptyfile exists, see s3setup()
205                 }, {
206                         path: "emptyfile/", // emptyfile exists, see s3setup()
207                 }, {
208                         path: "emptydir", // dir already exists, see s3setup()
209                 }, {
210                         path: "emptydir/",
211                 }, {
212                         path: "emptydir//",
213                 }, {
214                         path: "newdir/",
215                 }, {
216                         path: "newdir//",
217                 }, {
218                         path: "/",
219                 }, {
220                         path: "//",
221                 }, {
222                         path: "foo//bar",
223                 }, {
224                         path: "",
225                 },
226         } {
227                 trial := trial
228                 wg.Add(1)
229                 go func() {
230                         defer wg.Done()
231                         c.Logf("=== %v", trial)
232
233                         objname := prefix + trial.path
234
235                         buf := make([]byte, 1234)
236                         rand.Read(buf)
237
238                         err := bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), "application/octet-stream", s3.Private, s3.Options{})
239                         if !c.Check(err, check.ErrorMatches, `400 Bad.*`, check.Commentf("PUT %q should fail", objname)) {
240                                 return
241                         }
242
243                         if objname != "" && objname != "/" {
244                                 _, err = bucket.GetReader(objname)
245                                 c.Check(err, check.ErrorMatches, `404 Not Found`, check.Commentf("GET %q should return 404", objname))
246                         }
247                 }()
248         }
249         wg.Wait()
250 }
251
252 func (stage *s3stage) writeBigDirs(c *check.C, dirs int, filesPerDir int) {
253         fs, err := stage.coll.FileSystem(stage.arv, stage.kc)
254         c.Assert(err, check.IsNil)
255         for d := 0; d < dirs; d++ {
256                 dir := fmt.Sprintf("dir%d", d)
257                 c.Assert(fs.Mkdir(dir, 0755), check.IsNil)
258                 for i := 0; i < filesPerDir; i++ {
259                         f, err := fs.OpenFile(fmt.Sprintf("%s/file%d.txt", dir, i), os.O_CREATE|os.O_WRONLY, 0644)
260                         c.Assert(err, check.IsNil)
261                         c.Assert(f.Close(), check.IsNil)
262                 }
263         }
264         c.Assert(fs.Sync(), check.IsNil)
265 }
266
267 func (s *IntegrationSuite) TestS3GetBucketVersioning(c *check.C) {
268         stage := s.s3setup(c)
269         defer stage.teardown(c)
270         for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
271                 req, err := http.NewRequest("GET", bucket.URL("/"), nil)
272                 req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
273                 req.URL.RawQuery = "versioning"
274                 resp, err := http.DefaultClient.Do(req)
275                 c.Assert(err, check.IsNil)
276                 buf, err := ioutil.ReadAll(resp.Body)
277                 c.Assert(err, check.IsNil)
278                 c.Check(strings.TrimSpace(string(buf)), check.Equals, `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/"/>`)
279         }
280 }
281
282 func (s *IntegrationSuite) TestS3CollectionList(c *check.C) {
283         stage := s.s3setup(c)
284         defer stage.teardown(c)
285
286         filesPerDir := 1001
287         stage.writeBigDirs(c, 2, filesPerDir)
288         s.testS3List(c, stage.collbucket, "", 4000, 2+filesPerDir*2)
289         s.testS3List(c, stage.collbucket, "", 131, 2+filesPerDir*2)
290         s.testS3List(c, stage.collbucket, "dir0/", 71, filesPerDir)
291 }
292 func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix string, pageSize, expectFiles int) {
293         expectPageSize := pageSize
294         if expectPageSize > 1000 {
295                 expectPageSize = 1000
296         }
297         gotKeys := map[string]s3.Key{}
298         nextMarker := ""
299         pages := 0
300         for {
301                 resp, err := bucket.List(prefix, "", nextMarker, pageSize)
302                 if !c.Check(err, check.IsNil) {
303                         break
304                 }
305                 c.Check(len(resp.Contents) <= expectPageSize, check.Equals, true)
306                 if pages++; !c.Check(pages <= (expectFiles/expectPageSize)+1, check.Equals, true) {
307                         break
308                 }
309                 for _, key := range resp.Contents {
310                         gotKeys[key.Key] = key
311                 }
312                 if !resp.IsTruncated {
313                         c.Check(resp.NextMarker, check.Equals, "")
314                         break
315                 }
316                 if !c.Check(resp.NextMarker, check.Not(check.Equals), "") {
317                         break
318                 }
319                 nextMarker = resp.NextMarker
320         }
321         c.Check(len(gotKeys), check.Equals, expectFiles)
322 }
323
324 func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) {
325         stage := s.s3setup(c)
326         defer stage.teardown(c)
327
328         dirs := 2
329         filesPerDir := 500
330         stage.writeBigDirs(c, dirs, filesPerDir)
331         err := stage.collbucket.PutReader("dingbats", &bytes.Buffer{}, 0, "application/octet-stream", s3.Private, s3.Options{})
332         c.Assert(err, check.IsNil)
333         var allfiles []string
334         for marker := ""; ; {
335                 resp, err := stage.collbucket.List("", "", marker, 20000)
336                 c.Check(err, check.IsNil)
337                 for _, key := range resp.Contents {
338                         if len(allfiles) == 0 || allfiles[len(allfiles)-1] != key.Key {
339                                 allfiles = append(allfiles, key.Key)
340                         }
341                 }
342                 marker = resp.NextMarker
343                 if marker == "" {
344                         break
345                 }
346         }
347         c.Check(allfiles, check.HasLen, dirs*filesPerDir+3)
348
349         for _, trial := range []struct {
350                 prefix    string
351                 delimiter string
352                 marker    string
353         }{
354                 {"di", "/", ""},
355                 {"di", "r", ""},
356                 {"di", "n", ""},
357                 {"dir0", "/", ""},
358                 {"dir0", "/", "dir0/file14.txt"},       // no commonprefixes
359                 {"", "", "dir0/file14.txt"},            // middle page, skip walking dir1
360                 {"", "", "dir1/file14.txt"},            // middle page, skip walking dir0
361                 {"", "", "dir1/file498.txt"},           // last page of results
362                 {"dir1/file", "", "dir1/file498.txt"},  // last page of results, with prefix
363                 {"dir1/file", "/", "dir1/file498.txt"}, // last page of results, with prefix + delimiter
364                 {"dir1", "Z", "dir1/file498.txt"},      // delimiter "Z" never appears
365                 {"dir2", "/", ""},                      // prefix "dir2" does not exist
366                 {"", "/", ""},
367         } {
368                 c.Logf("\n\n=== trial %+v", trial)
369
370                 maxKeys := 20
371                 resp, err := stage.collbucket.List(trial.prefix, trial.delimiter, trial.marker, maxKeys)
372                 c.Check(err, check.IsNil)
373                 if resp.IsTruncated && trial.delimiter == "" {
374                         // goamz List method fills in the missing
375                         // NextMarker field if resp.IsTruncated, so
376                         // now we can't really tell whether it was
377                         // sent by the server or by goamz. In cases
378                         // where it should be empty but isn't, assume
379                         // it's goamz's fault.
380                         resp.NextMarker = ""
381                 }
382
383                 var expectKeys []string
384                 var expectPrefixes []string
385                 var expectNextMarker string
386                 var expectTruncated bool
387                 for _, key := range allfiles {
388                         full := len(expectKeys)+len(expectPrefixes) >= maxKeys
389                         if !strings.HasPrefix(key, trial.prefix) || key < trial.marker {
390                                 continue
391                         } else if idx := strings.Index(key[len(trial.prefix):], trial.delimiter); trial.delimiter != "" && idx >= 0 {
392                                 prefix := key[:len(trial.prefix)+idx+1]
393                                 if len(expectPrefixes) > 0 && expectPrefixes[len(expectPrefixes)-1] == prefix {
394                                         // same prefix as previous key
395                                 } else if full {
396                                         expectNextMarker = key
397                                         expectTruncated = true
398                                 } else {
399                                         expectPrefixes = append(expectPrefixes, prefix)
400                                 }
401                         } else if full {
402                                 if trial.delimiter != "" {
403                                         expectNextMarker = key
404                                 }
405                                 expectTruncated = true
406                                 break
407                         } else {
408                                 expectKeys = append(expectKeys, key)
409                         }
410                 }
411
412                 var gotKeys []string
413                 for _, key := range resp.Contents {
414                         gotKeys = append(gotKeys, key.Key)
415                 }
416                 var gotPrefixes []string
417                 for _, prefix := range resp.CommonPrefixes {
418                         gotPrefixes = append(gotPrefixes, prefix)
419                 }
420                 c.Check(gotKeys, check.DeepEquals, expectKeys)
421                 c.Check(gotPrefixes, check.DeepEquals, expectPrefixes)
422                 c.Check(resp.NextMarker, check.Equals, expectNextMarker)
423                 c.Check(resp.IsTruncated, check.Equals, expectTruncated)
424                 c.Logf("=== trial %+v keys %q prefixes %q nextMarker %q", trial, gotKeys, gotPrefixes, resp.NextMarker)
425         }
426 }