From: Tom Clegg Date: Mon, 27 Jun 2022 15:25:31 +0000 (-0400) Subject: 19088: Export collection/project properties as x-amz-meta tags. X-Git-Tag: 2.5.0~126^2~3 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/c502c5a50aae825683ee4cff629c6839a4209501 19088: Export collection/project properties as x-amz-meta tags. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go index ce9253ab3d..0cde825b38 100644 --- a/sdk/go/arvados/fs_base.go +++ b/sdk/go/arvados/fs_base.go @@ -234,6 +234,15 @@ type fileinfo struct { mode os.FileMode size int64 modTime time.Time + // Source data structure: *Collection, *Group, or + // nil. Currently populated only for project dirs and + // top-level collection dirs; *not* populated for + // /by_id/{uuid} dirs (only subdirs below that). Does not stay + // up to date with upstream changes. + // + // Intended to support keep-web's properties-as-s3-metadata + // feature (https://dev.arvados.org/issues/19088). + sys interface{} } // Name implements os.FileInfo. @@ -261,9 +270,9 @@ func (fi fileinfo) Size() int64 { return fi.size } -// Sys implements os.FileInfo. +// Sys implements os.FileInfo. See comment in fileinfo struct. func (fi fileinfo) Sys() interface{} { - return nil + return fi.sys } type nullnode struct{} diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go index ccfbdc4da2..d3af92f9e1 100644 --- a/sdk/go/arvados/fs_collection.go +++ b/sdk/go/arvados/fs_collection.go @@ -85,6 +85,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile name: ".", mode: os.ModeDir | 0755, modTime: modTime, + sys: c, }, inodes: make(map[string]inode), }, diff --git a/sdk/go/arvados/fs_deferred.go b/sdk/go/arvados/fs_deferred.go index 66a126a39c..07cf760034 100644 --- a/sdk/go/arvados/fs_deferred.go +++ b/sdk/go/arvados/fs_deferred.go @@ -24,6 +24,7 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode { name: coll.Name, modTime: modTime, mode: 0755 | os.ModeDir, + sys: &coll, }, } return &deferrednode{wrapped: placeholder, create: func() inode { diff --git a/sdk/go/arvados/fs_project.go b/sdk/go/arvados/fs_project.go index 380fb9c6d5..4db87a591e 100644 --- a/sdk/go/arvados/fs_project.go +++ b/sdk/go/arvados/fs_project.go @@ -38,6 +38,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in {"uuid", "is_a", []string{"arvados#collection", "arvados#group"}}, {"groups.group_class", "=", "project"}, }, + Select: []string{"uuid", "name", "modified_at", "properties"}, }) if err != nil { return nil, err @@ -63,7 +64,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in if strings.Contains(coll.UUID, "-j7d0g-") { // Group item was loaded into a Collection var -- but // we only need the Name and UUID anyway, so it's OK. - return fs.newProjectNode(parent, coll.Name, coll.UUID), nil + return fs.newProjectNode(parent, coll.Name, coll.UUID, coll.Properties), nil } else if strings.Contains(coll.UUID, "-4zz18-") { return deferredCollectionFS(fs, parent, coll), nil } else { @@ -98,6 +99,7 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode, Count: "none", Filters: filters, Order: "uuid", + Select: []string{"uuid", "name", "modified_at", "properties"}, } for { @@ -121,7 +123,7 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode, continue } if strings.Contains(i.UUID, "-j7d0g-") { - inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID)) + inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, i.Properties)) } else if strings.Contains(i.UUID, "-4zz18-") { inodes = append(inodes, deferredCollectionFS(fs, parent, i)) } else { diff --git a/sdk/go/arvados/fs_site.go b/sdk/go/arvados/fs_site.go index 3892be1e9a..0a561f667a 100644 --- a/sdk/go/arvados/fs_site.go +++ b/sdk/go/arvados/fs_site.go @@ -77,7 +77,7 @@ func (fs *customFileSystem) MountProject(mount, uuid string) { fs.root.treenode.Lock() defer fs.root.treenode.Unlock() fs.root.treenode.Child(mount, func(inode) (inode, error) { - return fs.newProjectNode(fs.root, mount, uuid), nil + return fs.newProjectNode(fs.root, mount, uuid, nil), nil }) } @@ -140,7 +140,7 @@ func (fs *customFileSystem) mountByID(parent inode, id string) inode { if strings.Contains(id, "-4zz18-") || pdhRegexp.MatchString(id) { return fs.mountCollection(parent, id) } else if strings.Contains(id, "-j7d0g-") { - return fs.newProjectNode(fs.root, id, id) + return fs.newProjectNode(fs.root, id, id, nil) } else { return nil } @@ -161,7 +161,7 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode { return cfs } -func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode { +func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, properties map[string]interface{}) inode { return &lookupnode{ stale: fs.Stale, loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) }, @@ -174,6 +174,11 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode name: name, modTime: time.Now(), mode: 0755 | os.ModeDir, + sys: &Group{ + GroupClass: "project", + UUID: uuid, + Properties: properties, + }, }, }, } diff --git a/sdk/go/arvados/fs_users.go b/sdk/go/arvados/fs_users.go index 00f7036969..ae47414b7a 100644 --- a/sdk/go/arvados/fs_users.go +++ b/sdk/go/arvados/fs_users.go @@ -20,7 +20,7 @@ func (fs *customFileSystem) usersLoadOne(parent inode, name string) (inode, erro return nil, os.ErrNotExist } user := resp.Items[0] - return fs.newProjectNode(parent, user.Username, user.UUID), nil + return fs.newProjectNode(parent, user.Username, user.UUID, nil), nil } func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) { @@ -41,7 +41,7 @@ func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) { if user.Username == "" { continue } - inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID)) + inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID, nil)) } params.Filters = []Filter{{"uuid", ">", resp.Items[len(resp.Items)-1].UUID}} } diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go index 59ab3cd438..4117dafbc6 100644 --- a/services/keep-web/s3.go +++ b/services/keep-web/s3.go @@ -8,12 +8,14 @@ import ( "crypto/hmac" "crypto/sha256" "encoding/base64" + "encoding/json" "encoding/xml" "errors" "fmt" "hash" "io" "net/http" + "net/textproto" "net/url" "os" "path/filepath" @@ -385,6 +387,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { if r.Method == "HEAD" && !objectNameGiven { // HeadBucket if err == nil && fi.IsDir() { + setFileInfoHeaders(w.Header(), fs, fspath) w.WriteHeader(http.StatusOK) } else if os.IsNotExist(err) { s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound) @@ -394,6 +397,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { return true } if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects { + setFileInfoHeaders(w.Header(), fs, fspath) w.Header().Set("Content-Type", "application/x-directory") w.WriteHeader(http.StatusOK) return true @@ -415,6 +419,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { // shallow copy r, and change URL path r := *r r.URL.Path = fspath + setFileInfoHeaders(w.Header(), fs, fspath) http.FileServer(fs).ServeHTTP(w, &r) return true case r.Method == http.MethodPut: @@ -586,6 +591,48 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { } } +func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) { + path = strings.TrimSuffix(path, "/") + var props map[string]interface{} + for { + fi, err := fs.Stat(path) + if err != nil { + return + } + switch src := fi.Sys().(type) { + case *arvados.Collection: + props = src.Properties + case *arvados.Group: + props = src.Properties + default: + // Try parent + cut := strings.LastIndexByte(path, '/') + if cut < 0 { + return + } + path = path[:cut] + continue + } + break + } + for k, v := range props { + if !validMIMEHeaderKey(k) { + continue + } + k = "x-amz-meta-" + k + if s, ok := v.(string); ok { + header.Set(k, s) + } else if j, err := json.Marshal(v); err == nil { + header.Set(k, string(j)) + } + } +} + +func validMIMEHeaderKey(k string) bool { + check := "z-" + k + return check != textproto.CanonicalMIMEHeaderKey(check) +} + // Call fn on the given path (directory) and its contents, in // lexicographic order. // diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go index 261ebb5741..a2e61e9b78 100644 --- a/services/keep-web/s3_test.go +++ b/services/keep-web/s3_test.go @@ -39,12 +39,13 @@ type s3stage struct { kc *keepclient.KeepClient proj arvados.Group projbucket *s3.Bucket + subproj arvados.Group coll arvados.Collection collbucket *s3.Bucket } func (s *IntegrationSuite) s3setup(c *check.C) s3stage { - var proj arvados.Group + var proj, subproj arvados.Group var coll arvados.Collection arv := arvados.NewClientFromEnv() arv.AuthToken = arvadostest.ActiveToken @@ -56,10 +57,27 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage { "ensure_unique_name": true, }) c.Assert(err, check.IsNil) + err = arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{ + "group": map[string]interface{}{ + "owner_uuid": proj.UUID, + "group_class": "project", + "name": "keep-web s3 test subproject", + "properties": map[string]interface{}{ + "subproject_properties_key": "subproject properties value", + "invalid header key": "this value will not be returned because key contains spaces", + }, + }, + }) + c.Assert(err, check.IsNil) err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{ "owner_uuid": proj.UUID, "name": "keep-web s3 test collection", "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n", + "properties": map[string]interface{}{ + "string": "string value", + "array": []string{"element1", "element2"}, + "object": map[string]interface{}{"key": map[string]interface{}{"key2": "value"}}, + }, }}) c.Assert(err, check.IsNil) ac, err := arvadosclient.New(arv) @@ -95,7 +113,8 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage { S3: client, Name: proj.UUID, }, - coll: coll, + subproj: subproj, + coll: coll, collbucket: &s3.Bucket{ S3: client, Name: coll.UUID, @@ -215,6 +234,46 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix c.Check(exists, check.Equals, true) } +func (s *IntegrationSuite) checkMetaEquals(c *check.C, resp *http.Response, expect map[string]string) { + got := map[string]string{} + for hk, hv := range resp.Header { + if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 { + got[k] = hv[0] + } + } + c.Check(got, check.DeepEquals, expect) +} + +func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + + expectCollectionTags := map[string]string{ + "String": "string value", + "Array": `["element1","element2"]`, + "Object": `{"key":{"key2":"value"}}`, + } + expectSubprojectTags := map[string]string{ + "Subproject_properties_key": "subproject properties value", + } + + resp, err := stage.collbucket.Head("sailboat.txt", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp, expectCollectionTags) + + resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp, expectCollectionTags) + + resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp, expectCollectionTags) + + resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp, expectSubprojectTags) +} + func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c)