Merge branch '19088-s3-properties-tags'
authorTom Clegg <tom@curii.com>
Thu, 30 Jun 2022 20:03:52 +0000 (16:03 -0400)
committerTom Clegg <tom@curii.com>
Thu, 30 Jun 2022 20:03:52 +0000 (16:03 -0400)
closes #19088

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

doc/api/keep-s3.html.textile.liquid
sdk/go/arvados/fs_base.go
sdk/go/arvados/fs_collection.go
sdk/go/arvados/fs_deferred.go
sdk/go/arvados/fs_project.go
sdk/go/arvados/fs_site.go
sdk/go/arvados/fs_users.go
services/keep-web/s3.go
services/keep-web/s3_test.go

index bee91516bc12fc61e87a51b603361372ad64e358..2e0c90ef47617e7aabc1d01928c4d7496842654d 100644 (file)
@@ -70,6 +70,22 @@ h4. GetBucketVersioning
 
 Bucket versioning is presently not supported, so this will always respond that bucket versioning is not enabled.
 
+h3. Accessing collection/project properties as metadata
+
+GetObject, HeadObject, and HeadBucket return Arvados object properties as S3 metadata headers, e.g., @X-Amz-Meta-Foo: bar@.
+
+If the requested path indicates a file or directory placeholder inside a collection, or the top level of a collection, GetObject and HeadObject return the collection properties.
+
+If the requested path indicates a directory placeholder corresponding to a project, GetObject and HeadObject return the properties of the project.
+
+HeadBucket returns the properties of the collection or project corresponding to the bucket name.
+
+Non-string property values are returned in a JSON representation, e.g., @["foo","bar"]@.
+
+As in Amazon S3, property values containing non-ASCII characters are returned in BASE64-encoded form as described in RFC 2047, e.g., @=?UTF-8?b?4pu1?=@.
+
+It is not possible to modify collection or project properties using the S3 API.
+
 h3. Authorization mechanisms
 
 Keep-web accepts AWS Signature Version 4 (AWS4-HMAC-SHA256) as well as the older V2 AWS signature.
index ce9253ab3d4f5d5447273cfe02edca716afc52fd..2ad4d1f859f1141035c04cb4180c5ef623d1fa04 100644 (file)
@@ -234,6 +234,14 @@ type fileinfo struct {
        mode    os.FileMode
        size    int64
        modTime time.Time
+       // If not nil, sys() returns the source data structure, which
+       // can be a *Collection, *Group, or nil. Currently populated
+       // only for project dirs and top-level collection dirs. Does
+       // not stay up to date with upstream changes.
+       //
+       // Intended to support keep-web's properties-as-s3-metadata
+       // feature (https://dev.arvados.org/issues/19088).
+       sys func() interface{}
 }
 
 // Name implements os.FileInfo.
@@ -261,9 +269,12 @@ func (fi fileinfo) Size() int64 {
        return fi.size
 }
 
-// Sys implements os.FileInfo.
+// Sys implements os.FileInfo. See comment in fileinfo struct.
 func (fi fileinfo) Sys() interface{} {
-       return nil
+       if fi.sys == nil {
+               return nil
+       }
+       return fi.sys()
 }
 
 type nullnode struct{}
index ccfbdc4da262c13ee3d319ad072f73a10b9b1d0a..26012e240603d0be43a1019346c4e946e2821790 100644 (file)
@@ -85,6 +85,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
                                name:    ".",
                                mode:    os.ModeDir | 0755,
                                modTime: modTime,
+                               sys:     func() interface{} { return c },
                        },
                        inodes: make(map[string]inode),
                },
index 66a126a39c12a45620a93c59a9047ac3d4ae1fe8..1dfa2df6e4005f0b6d93f497a657e81e583bad14 100644 (file)
@@ -24,6 +24,7 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode {
                        name:    coll.Name,
                        modTime: modTime,
                        mode:    0755 | os.ModeDir,
+                       sys:     func() interface{} { return &coll },
                },
        }
        return &deferrednode{wrapped: placeholder, create: func() inode {
index 380fb9c6d5f2f8dac636b06c90df14973c0adb36..bea1f76e24f24faffa38fbccd7b6b880ffb2d22e 100644 (file)
@@ -38,6 +38,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
                                {"uuid", "is_a", []string{"arvados#collection", "arvados#group"}},
                                {"groups.group_class", "=", "project"},
                        },
+                       Select: []string{"uuid", "name", "modified_at", "properties"},
                })
                if err != nil {
                        return nil, err
@@ -63,7 +64,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
        if strings.Contains(coll.UUID, "-j7d0g-") {
                // Group item was loaded into a Collection var -- but
                // we only need the Name and UUID anyway, so it's OK.
-               return fs.newProjectNode(parent, coll.Name, coll.UUID), nil
+               return fs.newProjectNode(parent, coll.Name, coll.UUID, nil), nil
        } else if strings.Contains(coll.UUID, "-4zz18-") {
                return deferredCollectionFS(fs, parent, coll), nil
        } else {
@@ -98,6 +99,7 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode,
                        Count:   "none",
                        Filters: filters,
                        Order:   "uuid",
+                       Select:  []string{"uuid", "name", "modified_at", "properties"},
                }
 
                for {
@@ -121,7 +123,12 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode,
                                        continue
                                }
                                if strings.Contains(i.UUID, "-j7d0g-") {
-                                       inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID))
+                                       inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, &Group{
+                                               UUID:       i.UUID,
+                                               Name:       i.Name,
+                                               ModifiedAt: i.ModifiedAt,
+                                               Properties: i.Properties,
+                                       }))
                                } else if strings.Contains(i.UUID, "-4zz18-") {
                                        inodes = append(inodes, deferredCollectionFS(fs, parent, i))
                                } else {
index 3892be1e9a97610522a1fc219d5c0fb807788e4c..bb2eee77925fd2c682c7d42e1e8e175c4f2f1489 100644 (file)
@@ -77,7 +77,7 @@ func (fs *customFileSystem) MountProject(mount, uuid string) {
        fs.root.treenode.Lock()
        defer fs.root.treenode.Unlock()
        fs.root.treenode.Child(mount, func(inode) (inode, error) {
-               return fs.newProjectNode(fs.root, mount, uuid), nil
+               return fs.newProjectNode(fs.root, mount, uuid, nil), nil
        })
 }
 
@@ -140,7 +140,7 @@ func (fs *customFileSystem) mountByID(parent inode, id string) inode {
        if strings.Contains(id, "-4zz18-") || pdhRegexp.MatchString(id) {
                return fs.mountCollection(parent, id)
        } else if strings.Contains(id, "-j7d0g-") {
-               return fs.newProjectNode(fs.root, id, id)
+               return fs.newProjectNode(fs.root, id, id, nil)
        } else {
                return nil
        }
@@ -161,7 +161,8 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode {
        return cfs
 }
 
-func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode {
+func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proj *Group) inode {
+       var projLoading sync.Mutex
        return &lookupnode{
                stale:   fs.Stale,
                loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) },
@@ -174,6 +175,20 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode
                                name:    name,
                                modTime: time.Now(),
                                mode:    0755 | os.ModeDir,
+                               sys: func() interface{} {
+                                       projLoading.Lock()
+                                       defer projLoading.Unlock()
+                                       if proj != nil {
+                                               return proj
+                                       }
+                                       var g Group
+                                       err := fs.RequestAndDecode(&g, "GET", "arvados/v1/groups/"+uuid, nil, nil)
+                                       if err != nil {
+                                               return err
+                                       }
+                                       proj = &g
+                                       return proj
+                               },
                        },
                },
        }
index 00f70369694430f70f0cca185270ceb905e34c01..ae47414b7abe80b9c0e2a2ff0a5e7c36d7320dd2 100644 (file)
@@ -20,7 +20,7 @@ func (fs *customFileSystem) usersLoadOne(parent inode, name string) (inode, erro
                return nil, os.ErrNotExist
        }
        user := resp.Items[0]
-       return fs.newProjectNode(parent, user.Username, user.UUID), nil
+       return fs.newProjectNode(parent, user.Username, user.UUID, nil), nil
 }
 
 func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) {
@@ -41,7 +41,7 @@ func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) {
                        if user.Username == "" {
                                continue
                        }
-                       inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID))
+                       inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID, nil))
                }
                params.Filters = []Filter{{"uuid", ">", resp.Items[len(resp.Items)-1].UUID}}
        }
index 59ab3cd4389c0cc57f9a923983ee58e12bca5184..90b75f8a306019c2b646d15228da0c1c54a62956 100644 (file)
@@ -8,12 +8,15 @@ import (
        "crypto/hmac"
        "crypto/sha256"
        "encoding/base64"
+       "encoding/json"
        "encoding/xml"
        "errors"
        "fmt"
        "hash"
        "io"
+       "mime"
        "net/http"
+       "net/textproto"
        "net/url"
        "os"
        "path/filepath"
@@ -385,6 +388,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                if r.Method == "HEAD" && !objectNameGiven {
                        // HeadBucket
                        if err == nil && fi.IsDir() {
+                               err = setFileInfoHeaders(w.Header(), fs, fspath)
+                               if err != nil {
+                                       s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                                       return true
+                               }
                                w.WriteHeader(http.StatusOK)
                        } else if os.IsNotExist(err) {
                                s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound)
@@ -394,6 +402,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        return true
                }
                if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects {
+                       err = setFileInfoHeaders(w.Header(), fs, fspath)
+                       if err != nil {
+                               s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                               return true
+                       }
                        w.Header().Set("Content-Type", "application/x-directory")
                        w.WriteHeader(http.StatusOK)
                        return true
@@ -415,6 +428,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                // shallow copy r, and change URL path
                r := *r
                r.URL.Path = fspath
+               err = setFileInfoHeaders(w.Header(), fs, fspath)
+               if err != nil {
+                       s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                       return true
+               }
                http.FileServer(fs).ServeHTTP(w, &r)
                return true
        case r.Method == http.MethodPut:
@@ -586,6 +604,60 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
        }
 }
 
+func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) error {
+       maybeEncode := func(s string) string {
+               for _, c := range s {
+                       if c > '\u007f' {
+                               return mime.BEncoding.Encode("UTF-8", s)
+                       }
+               }
+               return s
+       }
+       path = strings.TrimSuffix(path, "/")
+       var props map[string]interface{}
+       for {
+               fi, err := fs.Stat(path)
+               if err != nil {
+                       return err
+               }
+               switch src := fi.Sys().(type) {
+               case *arvados.Collection:
+                       props = src.Properties
+               case *arvados.Group:
+                       props = src.Properties
+               default:
+                       if err, ok := src.(error); ok {
+                               return err
+                       }
+                       // Try parent
+                       cut := strings.LastIndexByte(path, '/')
+                       if cut < 0 {
+                               return nil
+                       }
+                       path = path[:cut]
+                       continue
+               }
+               break
+       }
+       for k, v := range props {
+               if !validMIMEHeaderKey(k) {
+                       continue
+               }
+               k = "x-amz-meta-" + k
+               if s, ok := v.(string); ok {
+                       header.Set(k, maybeEncode(s))
+               } else if j, err := json.Marshal(v); err == nil {
+                       header.Set(k, maybeEncode(string(j)))
+               }
+       }
+       return nil
+}
+
+func validMIMEHeaderKey(k string) bool {
+       check := "z-" + k
+       return check != textproto.CanonicalMIMEHeaderKey(check)
+}
+
 // Call fn on the given path (directory) and its contents, in
 // lexicographic order.
 //
index 261ebb5741388a87a618d7168936e4292052a6c3..a99f3c278f6214b5764f853920c10539f7757ffe 100644 (file)
@@ -11,6 +11,7 @@ import (
        "crypto/sha256"
        "fmt"
        "io/ioutil"
+       "mime"
        "net/http"
        "net/http/httptest"
        "net/url"
@@ -39,12 +40,13 @@ type s3stage struct {
        kc         *keepclient.KeepClient
        proj       arvados.Group
        projbucket *s3.Bucket
+       subproj    arvados.Group
        coll       arvados.Collection
        collbucket *s3.Bucket
 }
 
 func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
-       var proj arvados.Group
+       var proj, subproj arvados.Group
        var coll arvados.Collection
        arv := arvados.NewClientFromEnv()
        arv.AuthToken = arvadostest.ActiveToken
@@ -52,14 +54,35 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
                "group": map[string]interface{}{
                        "group_class": "project",
                        "name":        "keep-web s3 test",
+                       "properties": map[string]interface{}{
+                               "project-properties-key": "project properties value",
+                       },
                },
                "ensure_unique_name": true,
        })
        c.Assert(err, check.IsNil)
+       err = arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
+               "group": map[string]interface{}{
+                       "owner_uuid":  proj.UUID,
+                       "group_class": "project",
+                       "name":        "keep-web s3 test subproject",
+                       "properties": map[string]interface{}{
+                               "subproject_properties_key": "subproject properties value",
+                               "invalid header key":        "this value will not be returned because key contains spaces",
+                       },
+               },
+       })
+       c.Assert(err, check.IsNil)
        err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
                "owner_uuid":    proj.UUID,
                "name":          "keep-web s3 test collection",
                "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
+               "properties": map[string]interface{}{
+                       "string":   "string value",
+                       "array":    []string{"element1", "element2"},
+                       "object":   map[string]interface{}{"key": map[string]interface{}{"key2": "value⛵"}},
+                       "nonascii": "⛵",
+               },
        }})
        c.Assert(err, check.IsNil)
        ac, err := arvadosclient.New(arv)
@@ -95,7 +118,8 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
                        S3:   client,
                        Name: proj.UUID,
                },
-               coll: coll,
+               subproj: subproj,
+               coll:    coll,
                collbucket: &s3.Bucket{
                        S3:   client,
                        Name: coll.UUID,
@@ -215,6 +239,73 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix
        c.Check(exists, check.Equals, true)
 }
 
+func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) {
+       got := map[string]string{}
+       for hk, hv := range hdr {
+               if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 {
+                       got[k] = hv[0]
+               }
+       }
+       c.Check(got, check.DeepEquals, expect)
+}
+
+func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       expectCollectionTags := map[string]string{
+               "String":   "string value",
+               "Array":    `["element1","element2"]`,
+               "Object":   mime.BEncoding.Encode("UTF-8", `{"key":{"key2":"value⛵"}}`),
+               "Nonascii": "=?UTF-8?b?4pu1?=",
+       }
+       expectSubprojectTags := map[string]string{
+               "Subproject_properties_key": "subproject properties value",
+       }
+       expectProjectTags := map[string]string{
+               "Project-Properties-Key": "project properties value",
+       }
+
+       c.Log("HEAD object with metadata from collection")
+       resp, err := stage.collbucket.Head("sailboat.txt", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("GET object with metadata from collection")
+       rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt")
+       c.Assert(err, check.IsNil)
+       content, err := ioutil.ReadAll(rdr)
+       c.Check(err, check.IsNil)
+       rdr.Close()
+       c.Check(content, check.HasLen, 4)
+       s.checkMetaEquals(c, hdr, expectCollectionTags)
+
+       c.Log("HEAD bucket with metadata from collection")
+       resp, err = stage.collbucket.Head("/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("HEAD directory placeholder with metadata from collection")
+       resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("HEAD file with metadata from collection")
+       resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("HEAD directory placeholder with metadata from subproject")
+       resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectSubprojectTags)
+
+       c.Log("HEAD bucket with metadata from project")
+       resp, err = stage.projbucket.Head("/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectProjectTags)
+}
+
 func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
        stage := s.s3setup(c)
        defer stage.teardown(c)