From b041a675c577e174680913e0da0bf69b1cca83b6 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Mon, 27 Jun 2022 14:19:45 -0400 Subject: [PATCH] 19088: Export collection/project properties as bucket-level tags. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- sdk/go/arvados/fs_base.go | 16 +++++++------ sdk/go/arvados/fs_collection.go | 2 +- sdk/go/arvados/fs_deferred.go | 2 +- sdk/go/arvados/fs_project.go | 9 ++++++-- sdk/go/arvados/fs_site.go | 20 ++++++++++++---- services/keep-web/s3.go | 28 +++++++++++++++++----- services/keep-web/s3_test.go | 41 ++++++++++++++++++++++++++++----- 7 files changed, 90 insertions(+), 28 deletions(-) diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go index 0cde825b38..2ad4d1f859 100644 --- a/sdk/go/arvados/fs_base.go +++ b/sdk/go/arvados/fs_base.go @@ -234,15 +234,14 @@ type fileinfo struct { mode os.FileMode size int64 modTime time.Time - // Source data structure: *Collection, *Group, or - // nil. Currently populated only for project dirs and - // top-level collection dirs; *not* populated for - // /by_id/{uuid} dirs (only subdirs below that). Does not stay - // up to date with upstream changes. + // If not nil, sys() returns the source data structure, which + // can be a *Collection, *Group, or nil. Currently populated + // only for project dirs and top-level collection dirs. Does + // not stay up to date with upstream changes. // // Intended to support keep-web's properties-as-s3-metadata // feature (https://dev.arvados.org/issues/19088). - sys interface{} + sys func() interface{} } // Name implements os.FileInfo. @@ -272,7 +271,10 @@ func (fi fileinfo) Size() int64 { // Sys implements os.FileInfo. See comment in fileinfo struct. func (fi fileinfo) Sys() interface{} { - return fi.sys + if fi.sys == nil { + return nil + } + return fi.sys() } type nullnode struct{} diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go index d3af92f9e1..26012e2406 100644 --- a/sdk/go/arvados/fs_collection.go +++ b/sdk/go/arvados/fs_collection.go @@ -85,7 +85,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile name: ".", mode: os.ModeDir | 0755, modTime: modTime, - sys: c, + sys: func() interface{} { return c }, }, inodes: make(map[string]inode), }, diff --git a/sdk/go/arvados/fs_deferred.go b/sdk/go/arvados/fs_deferred.go index 07cf760034..1dfa2df6e4 100644 --- a/sdk/go/arvados/fs_deferred.go +++ b/sdk/go/arvados/fs_deferred.go @@ -24,7 +24,7 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode { name: coll.Name, modTime: modTime, mode: 0755 | os.ModeDir, - sys: &coll, + sys: func() interface{} { return &coll }, }, } return &deferrednode{wrapped: placeholder, create: func() inode { diff --git a/sdk/go/arvados/fs_project.go b/sdk/go/arvados/fs_project.go index 4db87a591e..bea1f76e24 100644 --- a/sdk/go/arvados/fs_project.go +++ b/sdk/go/arvados/fs_project.go @@ -64,7 +64,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in if strings.Contains(coll.UUID, "-j7d0g-") { // Group item was loaded into a Collection var -- but // we only need the Name and UUID anyway, so it's OK. - return fs.newProjectNode(parent, coll.Name, coll.UUID, coll.Properties), nil + return fs.newProjectNode(parent, coll.Name, coll.UUID, nil), nil } else if strings.Contains(coll.UUID, "-4zz18-") { return deferredCollectionFS(fs, parent, coll), nil } else { @@ -123,7 +123,12 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode, continue } if strings.Contains(i.UUID, "-j7d0g-") { - inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, i.Properties)) + inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, &Group{ + UUID: i.UUID, + Name: i.Name, + ModifiedAt: i.ModifiedAt, + Properties: i.Properties, + })) } else if strings.Contains(i.UUID, "-4zz18-") { inodes = append(inodes, deferredCollectionFS(fs, parent, i)) } else { diff --git a/sdk/go/arvados/fs_site.go b/sdk/go/arvados/fs_site.go index 0a561f667a..bb2eee7792 100644 --- a/sdk/go/arvados/fs_site.go +++ b/sdk/go/arvados/fs_site.go @@ -161,7 +161,8 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode { return cfs } -func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, properties map[string]interface{}) inode { +func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proj *Group) inode { + var projLoading sync.Mutex return &lookupnode{ stale: fs.Stale, loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) }, @@ -174,10 +175,19 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proper name: name, modTime: time.Now(), mode: 0755 | os.ModeDir, - sys: &Group{ - GroupClass: "project", - UUID: uuid, - Properties: properties, + sys: func() interface{} { + projLoading.Lock() + defer projLoading.Unlock() + if proj != nil { + return proj + } + var g Group + err := fs.RequestAndDecode(&g, "GET", "arvados/v1/groups/"+uuid, nil, nil) + if err != nil { + return err + } + proj = &g + return proj }, }, }, diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go index 4117dafbc6..d92828e066 100644 --- a/services/keep-web/s3.go +++ b/services/keep-web/s3.go @@ -387,7 +387,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { if r.Method == "HEAD" && !objectNameGiven { // HeadBucket if err == nil && fi.IsDir() { - setFileInfoHeaders(w.Header(), fs, fspath) + err = setFileInfoHeaders(w.Header(), fs, fspath) + if err != nil { + s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway) + return true + } w.WriteHeader(http.StatusOK) } else if os.IsNotExist(err) { s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound) @@ -397,7 +401,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { return true } if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects { - setFileInfoHeaders(w.Header(), fs, fspath) + err = setFileInfoHeaders(w.Header(), fs, fspath) + if err != nil { + s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway) + return true + } w.Header().Set("Content-Type", "application/x-directory") w.WriteHeader(http.StatusOK) return true @@ -419,7 +427,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { // shallow copy r, and change URL path r := *r r.URL.Path = fspath - setFileInfoHeaders(w.Header(), fs, fspath) + err = setFileInfoHeaders(w.Header(), fs, fspath) + if err != nil { + s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway) + return true + } http.FileServer(fs).ServeHTTP(w, &r) return true case r.Method == http.MethodPut: @@ -591,13 +603,13 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { } } -func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) { +func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) error { path = strings.TrimSuffix(path, "/") var props map[string]interface{} for { fi, err := fs.Stat(path) if err != nil { - return + return err } switch src := fi.Sys().(type) { case *arvados.Collection: @@ -605,10 +617,13 @@ func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path st case *arvados.Group: props = src.Properties default: + if err, ok := src.(error); ok { + return err + } // Try parent cut := strings.LastIndexByte(path, '/') if cut < 0 { - return + return nil } path = path[:cut] continue @@ -626,6 +641,7 @@ func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path st header.Set(k, string(j)) } } + return nil } func validMIMEHeaderKey(k string) bool { diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go index a2e61e9b78..b25ef972dc 100644 --- a/services/keep-web/s3_test.go +++ b/services/keep-web/s3_test.go @@ -53,6 +53,9 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage { "group": map[string]interface{}{ "group_class": "project", "name": "keep-web s3 test", + "properties": map[string]interface{}{ + "project-properties-key": "project properties value", + }, }, "ensure_unique_name": true, }) @@ -234,9 +237,9 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix c.Check(exists, check.Equals, true) } -func (s *IntegrationSuite) checkMetaEquals(c *check.C, resp *http.Response, expect map[string]string) { +func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) { got := map[string]string{} - for hk, hv := range resp.Header { + for hk, hv := range hdr { if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 { got[k] = hv[0] } @@ -256,22 +259,48 @@ func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) { expectSubprojectTags := map[string]string{ "Subproject_properties_key": "subproject properties value", } + expectProjectTags := map[string]string{ + "Project-Properties-Key": "project properties value", + } + c.Log("HEAD object with metadata from collection") resp, err := stage.collbucket.Head("sailboat.txt", nil) c.Assert(err, check.IsNil) - s.checkMetaEquals(c, resp, expectCollectionTags) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + + c.Log("GET object with metadata from collection") + rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt") + c.Assert(err, check.IsNil) + content, err := ioutil.ReadAll(rdr) + c.Check(err, check.IsNil) + rdr.Close() + c.Check(content, check.HasLen, 4) + s.checkMetaEquals(c, hdr, expectCollectionTags) + + c.Log("HEAD bucket with metadata from collection") + resp, err = stage.collbucket.Head("/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + c.Log("HEAD directory placeholder with metadata from collection") resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil) c.Assert(err, check.IsNil) - s.checkMetaEquals(c, resp, expectCollectionTags) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + c.Log("HEAD file with metadata from collection") resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil) c.Assert(err, check.IsNil) - s.checkMetaEquals(c, resp, expectCollectionTags) + s.checkMetaEquals(c, resp.Header, expectCollectionTags) + c.Log("HEAD directory placeholder with metadata from subproject") resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil) c.Assert(err, check.IsNil) - s.checkMetaEquals(c, resp, expectSubprojectTags) + s.checkMetaEquals(c, resp.Header, expectSubprojectTags) + + c.Log("HEAD bucket with metadata from project") + resp, err = stage.projbucket.Head("/", nil) + c.Assert(err, check.IsNil) + s.checkMetaEquals(c, resp.Header, expectProjectTags) } func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) { -- 2.30.2