From: Tom Clegg Date: Thu, 30 Jun 2022 20:03:52 +0000 (-0400) Subject: Merge branch '19088-s3-properties-tags' X-Git-Tag: 2.5.0~126 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/607033c33f2001c194fe8c68d0dc17e4bde849da?hp=bdcf09e34f8eec88e1e326094ac60b5d484844e1 Merge branch '19088-s3-properties-tags' closes #19088 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/doc/api/keep-s3.html.textile.liquid b/doc/api/keep-s3.html.textile.liquid index bee91516bc..2e0c90ef47 100644 --- a/doc/api/keep-s3.html.textile.liquid +++ b/doc/api/keep-s3.html.textile.liquid @@ -70,6 +70,22 @@ h4. GetBucketVersioning Bucket versioning is presently not supported, so this will always respond that bucket versioning is not enabled. +h3. Accessing collection/project properties as metadata + +GetObject, HeadObject, and HeadBucket return Arvados object properties as S3 metadata headers, e.g., @X-Amz-Meta-Foo: bar@. + +If the requested path indicates a file or directory placeholder inside a collection, or the top level of a collection, GetObject and HeadObject return the collection properties. + +If the requested path indicates a directory placeholder corresponding to a project, GetObject and HeadObject return the properties of the project. + +HeadBucket returns the properties of the collection or project corresponding to the bucket name. + +Non-string property values are returned in a JSON representation, e.g., @["foo","bar"]@. + +As in Amazon S3, property values containing non-ASCII characters are returned in BASE64-encoded form as described in RFC 2047, e.g., @=?UTF-8?b?4pu1?=@. + +It is not possible to modify collection or project properties using the S3 API. + h3. Authorization mechanisms Keep-web accepts AWS Signature Version 4 (AWS4-HMAC-SHA256) as well as the older V2 AWS signature. diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go index ce9253ab3d..2ad4d1f859 100644 --- a/sdk/go/arvados/fs_base.go +++ b/sdk/go/arvados/fs_base.go @@ -234,6 +234,14 @@ type fileinfo struct { mode os.FileMode size int64 modTime time.Time + // If not nil, sys() returns the source data structure, which + // can be a *Collection, *Group, or nil. Currently populated + // only for project dirs and top-level collection dirs. Does + // not stay up to date with upstream changes. + // + // Intended to support keep-web's properties-as-s3-metadata + // feature (https://dev.arvados.org/issues/19088). + sys func() interface{} } // Name implements os.FileInfo. @@ -261,9 +269,12 @@ func (fi fileinfo) Size() int64 { return fi.size } -// Sys implements os.FileInfo. +// Sys implements os.FileInfo. See comment in fileinfo struct. func (fi fileinfo) Sys() interface{} { - return nil + if fi.sys == nil { + return nil + } + return fi.sys() } type nullnode struct{} diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go index ccfbdc4da2..26012e2406 100644 --- a/sdk/go/arvados/fs_collection.go +++ b/sdk/go/arvados/fs_collection.go @@ -85,6 +85,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile name: ".", mode: os.ModeDir | 0755, modTime: modTime, + sys: func() interface{} { return c }, }, inodes: make(map[string]inode), }, diff --git a/sdk/go/arvados/fs_deferred.go b/sdk/go/arvados/fs_deferred.go index 66a126a39c..1dfa2df6e4 100644 --- a/sdk/go/arvados/fs_deferred.go +++ b/sdk/go/arvados/fs_deferred.go @@ -24,6 +24,7 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode { name: coll.Name, modTime: modTime, mode: 0755 | os.ModeDir, + sys: func() interface{} { return &coll }, }, } return &deferrednode{wrapped: placeholder, create: func() inode { diff --git a/sdk/go/arvados/fs_project.go b/sdk/go/arvados/fs_project.go index 380fb9c6d5..bea1f76e24 100644 --- a/sdk/go/arvados/fs_project.go +++ b/sdk/go/arvados/fs_project.go @@ -38,6 +38,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in {"uuid", "is_a", []string{"arvados#collection", "arvados#group"}}, {"groups.group_class", "=", "project"}, }, + Select: []string{"uuid", "name", "modified_at", "properties"}, }) if err != nil { return nil, err @@ -63,7 +64,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in if strings.Contains(coll.UUID, "-j7d0g-") { // Group item was loaded into a Collection var -- but // we only need the Name and UUID anyway, so it's OK. - return fs.newProjectNode(parent, coll.Name, coll.UUID), nil + return fs.newProjectNode(parent, coll.Name, coll.UUID, nil), nil } else if strings.Contains(coll.UUID, "-4zz18-") { return deferredCollectionFS(fs, parent, coll), nil } else { @@ -98,6 +99,7 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode, Count: "none", Filters: filters, Order: "uuid", + Select: []string{"uuid", "name", "modified_at", "properties"}, } for { @@ -121,7 +123,12 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode, continue } if strings.Contains(i.UUID, "-j7d0g-") { - inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID)) + inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, &Group{ + UUID: i.UUID, + Name: i.Name, + ModifiedAt: i.ModifiedAt, + Properties: i.Properties, + })) } else if strings.Contains(i.UUID, "-4zz18-") { inodes = append(inodes, deferredCollectionFS(fs, parent, i)) } else { diff --git a/sdk/go/arvados/fs_site.go b/sdk/go/arvados/fs_site.go index 3892be1e9a..bb2eee7792 100644 --- a/sdk/go/arvados/fs_site.go +++ b/sdk/go/arvados/fs_site.go @@ -77,7 +77,7 @@ func (fs *customFileSystem) MountProject(mount, uuid string) { fs.root.treenode.Lock() defer fs.root.treenode.Unlock() fs.root.treenode.Child(mount, func(inode) (inode, error) { - return fs.newProjectNode(fs.root, mount, uuid), nil + return fs.newProjectNode(fs.root, mount, uuid, nil), nil }) } @@ -140,7 +140,7 @@ func (fs *customFileSystem) mountByID(parent inode, id string) inode { if strings.Contains(id, "-4zz18-") || pdhRegexp.MatchString(id) { return fs.mountCollection(parent, id) } else if strings.Contains(id, "-j7d0g-") { - return fs.newProjectNode(fs.root, id, id) + return fs.newProjectNode(fs.root, id, id, nil) } else { return nil } @@ -161,7 +161,8 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode { return cfs } -func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode { +func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proj *Group) inode { + var projLoading sync.Mutex return &lookupnode{ stale: fs.Stale, loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) }, @@ -174,6 +175,20 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode name: name, modTime: time.Now(), mode: 0755 | os.ModeDir, + sys: func() interface{} { + projLoading.Lock() + defer projLoading.Unlock() + if proj != nil { + return proj + } + var g Group + err := fs.RequestAndDecode(&g, "GET", "arvados/v1/groups/"+uuid, nil, nil) + if err != nil { + return err + } + proj = &g + return proj + }, }, }, } diff --git a/sdk/go/arvados/fs_users.go b/sdk/go/arvados/fs_users.go index 00f7036969..ae47414b7a 100644 --- a/sdk/go/arvados/fs_users.go +++ b/sdk/go/arvados/fs_users.go @@ -20,7 +20,7 @@ func (fs *customFileSystem) usersLoadOne(parent inode, name string) (inode, erro return nil, os.ErrNotExist } user := resp.Items[0] - return fs.newProjectNode(parent, user.Username, user.UUID), nil + return fs.newProjectNode(parent, user.Username, user.UUID, nil), nil } func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) { @@ -41,7 +41,7 @@ func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) { if user.Username == "" { continue } - inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID)) + inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID, nil)) } params.Filters = []Filter{{"uuid", ">", resp.Items[len(resp.Items)-1].UUID}} } diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go index 59ab3cd438..90b75f8a30 100644 --- a/services/keep-web/s3.go +++ b/services/keep-web/s3.go @@ -8,12 +8,15 @@ import ( "crypto/hmac" "crypto/sha256" "encoding/base64" + "encoding/json" "encoding/xml" "errors" "fmt" "hash" "io" + "mime" "net/http" + "net/textproto" "net/url" "os" "path/filepath" @@ -385,6 +388,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { if r.Method == "HEAD" && !objectNameGiven { // HeadBucket if err == nil && fi.IsDir() { + err = setFileInfoHeaders(w.Header(), fs, fspath) + if err != nil { + s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway) + return true + } w.WriteHeader(http.StatusOK) } else if os.IsNotExist(err) { s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound) @@ -394,6 +402,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { return true } if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects { + err = setFileInfoHeaders(w.Header(), fs, fspath) + if err != nil { + s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway) + return true + } w.Header().Set("Content-Type", "application/x-directory") w.WriteHeader(http.StatusOK) return true @@ -415,6 +428,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { // shallow copy r, and change URL path r := *r r.URL.Path = fspath + err = setFileInfoHeaders(w.Header(), fs, fspath) + if err != nil { + s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway) + return true + } http.FileServer(fs).ServeHTTP(w, &r) return true case r.Method == http.MethodPut: @@ -586,6 +604,60 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { } } +func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) error { + maybeEncode := func(s string) string { + for _, c := range s { + if c > '\u007f' { + return mime.BEncoding.Encode("UTF-8", s) + } + } + return s + } + path = strings.TrimSuffix(path, "/") + var props map[string]interface{} + for { + fi, err := fs.Stat(path) + if err != nil { + return err + } + switch src := fi.Sys().(type) { + case *arvados.Collection: + props = src.Properties + case *arvados.Group: + props = src.Properties + default: + if err, ok := src.(error); ok { + return err + } + // Try parent + cut := strings.LastIndexByte(path, '/') + if cut < 0 { + return nil + } + path = path[:cut] + continue + } + break + } + for k, v := range props { + if !validMIMEHeaderKey(k) { + continue + } + k = "x-amz-meta-" + k + if s, ok := v.(string); ok { + header.Set(k, maybeEncode(s)) + } else if j, err := json.Marshal(v); err == nil { + header.Set(k, maybeEncode(string(j))) + } + } + return nil +} + +func validMIMEHeaderKey(k string) bool { + check := "z-" + k + return check != textproto.CanonicalMIMEHeaderKey(check) +} + // Call fn on the given path (directory) and its contents, in // lexicographic order. // diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go index 261ebb5741..a99f3c278f 100644 --- a/services/keep-web/s3_test.go +++ b/services/keep-web/s3_test.go @@ -11,6 +11,7 @@ import ( "crypto/sha256" "fmt" "io/ioutil" + "mime" "net/http" "net/http/httptest" "net/url" @@ -39,12 +40,13 @@ type s3stage struct { kc *keepclient.KeepClient proj arvados.Group projbucket *s3.Bucket + subproj arvados.Group coll arvados.Collection collbucket *s3.Bucket } func (s *IntegrationSuite) s3setup(c *check.C) s3stage { - var proj arvados.Group + var proj, subproj arvados.Group var coll arvados.Collection arv := arvados.NewClientFromEnv() arv.AuthToken = arvadostest.ActiveToken @@ -52,14 +54,35 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage { "group": map[string]interface{}{ "group_class": "project", "name": "keep-web s3 test", + "properties": map[string]interface{}{ + "project-properties-key": "project properties value", + }, }, "ensure_unique_name": true, }) c.Assert(err, check.IsNil) + err = arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{ + "group": map[string]interface{}{ + "owner_uuid": proj.UUID, + "group_class": "project", + "name": "keep-web s3 test subproject", + "properties": map[string]interface{}{ + "subproject_properties_key": "subproject properties value", + "invalid header key": "this value will not be returned because key contains spaces", + }, + }, + }) + c.Assert(err, check.IsNil) err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{ "owner_uuid": proj.UUID, "name": "keep-web s3 test collection", "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n", + "properties": map[string]interface{}{ + "string": "string value", + "array": []string{"element1", "element2"}, + "object": map[string]interface{}{"key": map[string]interface{}{"key2": "value⛵"}}, + "nonascii": "⛵", + }, }}) c.Assert(err, check.IsNil) ac, err := arvadosclient.New(arv) @@ -95,7 +118,8 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage { S3: client, Name: proj.UUID, }, - coll: coll, + subproj: subproj, + coll: coll, collbucket: &s3.Bucket{ S3: client, Name: coll.UUID, @@ -215,6 +239,73 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix c.Check(exists, check.Equals, true) } +func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) { + got := map[string]string{} + for hk, hv := range hdr { + if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 { + got[k] = hv[0] + } + } + c.Check(got, check.DeepEquals, expect) +} + +func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + + expectCollectionTags := map[string]string{ + "String": "string value", + "Array": `["element1","element2"]`, + "Object": mime.BEncoding.Encode("UTF-8", `{"key":{"key2":"value⛵"}}`), + "Nonascii": "=?UTF-8?b?4pu1?=", + } + expectSubprojectTags := map[string]string{ + "Subproject_properties_key": "subproject properties value", + } + expectProjectTags := map[string]string{ + "Project-Properties-Key": "project properties value", + } + + c.Log("HEAD object with metadata from collection") + resp, err := stage.collbucket.Head("sailboat.txt", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + + c.Log("GET object with metadata from collection") + rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt") + c.Assert(err, check.IsNil) + content, err := ioutil.ReadAll(rdr) + c.Check(err, check.IsNil) + rdr.Close() + c.Check(content, check.HasLen, 4) + s.checkMetaEquals(c, hdr, expectCollectionTags) + + c.Log("HEAD bucket with metadata from collection") + resp, err = stage.collbucket.Head("/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + + c.Log("HEAD directory placeholder with metadata from collection") + resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + + c.Log("HEAD file with metadata from collection") + resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + + c.Log("HEAD directory placeholder with metadata from subproject") + resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectSubprojectTags) + + c.Log("HEAD bucket with metadata from project") + resp, err = stage.projbucket.Head("/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectProjectTags) +} + func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c)