19088: Export collection/project properties as bucket-level tags.
authorTom Clegg <tom@curii.com>
Mon, 27 Jun 2022 18:19:45 +0000 (14:19 -0400)
committerTom Clegg <tom@curii.com>
Mon, 27 Jun 2022 18:19:45 +0000 (14:19 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

sdk/go/arvados/fs_base.go
sdk/go/arvados/fs_collection.go
sdk/go/arvados/fs_deferred.go
sdk/go/arvados/fs_project.go
sdk/go/arvados/fs_site.go
services/keep-web/s3.go
services/keep-web/s3_test.go

index 0cde825b38834fffa945809c4609184b77923785..2ad4d1f859f1141035c04cb4180c5ef623d1fa04 100644 (file)
@@ -234,15 +234,14 @@ type fileinfo struct {
        mode    os.FileMode
        size    int64
        modTime time.Time
-       // Source data structure: *Collection, *Group, or
-       // nil. Currently populated only for project dirs and
-       // top-level collection dirs; *not* populated for
-       // /by_id/{uuid} dirs (only subdirs below that). Does not stay
-       // up to date with upstream changes.
+       // If not nil, sys() returns the source data structure, which
+       // can be a *Collection, *Group, or nil. Currently populated
+       // only for project dirs and top-level collection dirs. Does
+       // not stay up to date with upstream changes.
        //
        // Intended to support keep-web's properties-as-s3-metadata
        // feature (https://dev.arvados.org/issues/19088).
-       sys interface{}
+       sys func() interface{}
 }
 
 // Name implements os.FileInfo.
@@ -272,7 +271,10 @@ func (fi fileinfo) Size() int64 {
 
 // Sys implements os.FileInfo. See comment in fileinfo struct.
 func (fi fileinfo) Sys() interface{} {
-       return fi.sys
+       if fi.sys == nil {
+               return nil
+       }
+       return fi.sys()
 }
 
 type nullnode struct{}
index d3af92f9e1649957050f4f62fffae6a4c36e31a9..26012e240603d0be43a1019346c4e946e2821790 100644 (file)
@@ -85,7 +85,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
                                name:    ".",
                                mode:    os.ModeDir | 0755,
                                modTime: modTime,
-                               sys:     c,
+                               sys:     func() interface{} { return c },
                        },
                        inodes: make(map[string]inode),
                },
index 07cf760034d050814a211b074e1ae3fa51b74c59..1dfa2df6e4005f0b6d93f497a657e81e583bad14 100644 (file)
@@ -24,7 +24,7 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode {
                        name:    coll.Name,
                        modTime: modTime,
                        mode:    0755 | os.ModeDir,
-                       sys:     &coll,
+                       sys:     func() interface{} { return &coll },
                },
        }
        return &deferrednode{wrapped: placeholder, create: func() inode {
index 4db87a591eebabad096bb988204f6c90e9ad7ef0..bea1f76e24f24faffa38fbccd7b6b880ffb2d22e 100644 (file)
@@ -64,7 +64,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
        if strings.Contains(coll.UUID, "-j7d0g-") {
                // Group item was loaded into a Collection var -- but
                // we only need the Name and UUID anyway, so it's OK.
-               return fs.newProjectNode(parent, coll.Name, coll.UUID, coll.Properties), nil
+               return fs.newProjectNode(parent, coll.Name, coll.UUID, nil), nil
        } else if strings.Contains(coll.UUID, "-4zz18-") {
                return deferredCollectionFS(fs, parent, coll), nil
        } else {
@@ -123,7 +123,12 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode,
                                        continue
                                }
                                if strings.Contains(i.UUID, "-j7d0g-") {
-                                       inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, i.Properties))
+                                       inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, &Group{
+                                               UUID:       i.UUID,
+                                               Name:       i.Name,
+                                               ModifiedAt: i.ModifiedAt,
+                                               Properties: i.Properties,
+                                       }))
                                } else if strings.Contains(i.UUID, "-4zz18-") {
                                        inodes = append(inodes, deferredCollectionFS(fs, parent, i))
                                } else {
index 0a561f667a0b4a6e12f8692d1a16ba6dff0b37f1..bb2eee77925fd2c682c7d42e1e8e175c4f2f1489 100644 (file)
@@ -161,7 +161,8 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode {
        return cfs
 }
 
-func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, properties map[string]interface{}) inode {
+func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proj *Group) inode {
+       var projLoading sync.Mutex
        return &lookupnode{
                stale:   fs.Stale,
                loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) },
@@ -174,10 +175,19 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proper
                                name:    name,
                                modTime: time.Now(),
                                mode:    0755 | os.ModeDir,
-                               sys: &Group{
-                                       GroupClass: "project",
-                                       UUID:       uuid,
-                                       Properties: properties,
+                               sys: func() interface{} {
+                                       projLoading.Lock()
+                                       defer projLoading.Unlock()
+                                       if proj != nil {
+                                               return proj
+                                       }
+                                       var g Group
+                                       err := fs.RequestAndDecode(&g, "GET", "arvados/v1/groups/"+uuid, nil, nil)
+                                       if err != nil {
+                                               return err
+                                       }
+                                       proj = &g
+                                       return proj
                                },
                        },
                },
index 4117dafbc6f248e14cafa41cbfdd0193d1408fc9..d92828e066eeb9b61c9b25e38be543989ca540a1 100644 (file)
@@ -387,7 +387,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                if r.Method == "HEAD" && !objectNameGiven {
                        // HeadBucket
                        if err == nil && fi.IsDir() {
-                               setFileInfoHeaders(w.Header(), fs, fspath)
+                               err = setFileInfoHeaders(w.Header(), fs, fspath)
+                               if err != nil {
+                                       s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                                       return true
+                               }
                                w.WriteHeader(http.StatusOK)
                        } else if os.IsNotExist(err) {
                                s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound)
@@ -397,7 +401,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        return true
                }
                if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects {
-                       setFileInfoHeaders(w.Header(), fs, fspath)
+                       err = setFileInfoHeaders(w.Header(), fs, fspath)
+                       if err != nil {
+                               s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                               return true
+                       }
                        w.Header().Set("Content-Type", "application/x-directory")
                        w.WriteHeader(http.StatusOK)
                        return true
@@ -419,7 +427,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                // shallow copy r, and change URL path
                r := *r
                r.URL.Path = fspath
-               setFileInfoHeaders(w.Header(), fs, fspath)
+               err = setFileInfoHeaders(w.Header(), fs, fspath)
+               if err != nil {
+                       s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                       return true
+               }
                http.FileServer(fs).ServeHTTP(w, &r)
                return true
        case r.Method == http.MethodPut:
@@ -591,13 +603,13 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
        }
 }
 
-func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) {
+func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) error {
        path = strings.TrimSuffix(path, "/")
        var props map[string]interface{}
        for {
                fi, err := fs.Stat(path)
                if err != nil {
-                       return
+                       return err
                }
                switch src := fi.Sys().(type) {
                case *arvados.Collection:
@@ -605,10 +617,13 @@ func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path st
                case *arvados.Group:
                        props = src.Properties
                default:
+                       if err, ok := src.(error); ok {
+                               return err
+                       }
                        // Try parent
                        cut := strings.LastIndexByte(path, '/')
                        if cut < 0 {
-                               return
+                               return nil
                        }
                        path = path[:cut]
                        continue
@@ -626,6 +641,7 @@ func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path st
                        header.Set(k, string(j))
                }
        }
+       return nil
 }
 
 func validMIMEHeaderKey(k string) bool {
index a2e61e9b7806679b3d8dec9b0f2d1dcba76fb4e3..b25ef972dc1805a898da87fa2e2aa8512bcfb6a7 100644 (file)
@@ -53,6 +53,9 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
                "group": map[string]interface{}{
                        "group_class": "project",
                        "name":        "keep-web s3 test",
+                       "properties": map[string]interface{}{
+                               "project-properties-key": "project properties value",
+                       },
                },
                "ensure_unique_name": true,
        })
@@ -234,9 +237,9 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix
        c.Check(exists, check.Equals, true)
 }
 
-func (s *IntegrationSuite) checkMetaEquals(c *check.C, resp *http.Response, expect map[string]string) {
+func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) {
        got := map[string]string{}
-       for hk, hv := range resp.Header {
+       for hk, hv := range hdr {
                if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 {
                        got[k] = hv[0]
                }
@@ -256,22 +259,48 @@ func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) {
        expectSubprojectTags := map[string]string{
                "Subproject_properties_key": "subproject properties value",
        }
+       expectProjectTags := map[string]string{
+               "Project-Properties-Key": "project properties value",
+       }
 
+       c.Log("HEAD object with metadata from collection")
        resp, err := stage.collbucket.Head("sailboat.txt", nil)
        c.Assert(err, check.IsNil)
-       s.checkMetaEquals(c, resp, expectCollectionTags)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("GET object with metadata from collection")
+       rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt")
+       c.Assert(err, check.IsNil)
+       content, err := ioutil.ReadAll(rdr)
+       c.Check(err, check.IsNil)
+       rdr.Close()
+       c.Check(content, check.HasLen, 4)
+       s.checkMetaEquals(c, hdr, expectCollectionTags)
+
+       c.Log("HEAD bucket with metadata from collection")
+       resp, err = stage.collbucket.Head("/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
 
+       c.Log("HEAD directory placeholder with metadata from collection")
        resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil)
        c.Assert(err, check.IsNil)
-       s.checkMetaEquals(c, resp, expectCollectionTags)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
 
+       c.Log("HEAD file with metadata from collection")
        resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil)
        c.Assert(err, check.IsNil)
-       s.checkMetaEquals(c, resp, expectCollectionTags)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
 
+       c.Log("HEAD directory placeholder with metadata from subproject")
        resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil)
        c.Assert(err, check.IsNil)
-       s.checkMetaEquals(c, resp, expectSubprojectTags)
+       s.checkMetaEquals(c, resp.Header, expectSubprojectTags)
+
+       c.Log("HEAD bucket with metadata from project")
+       resp, err = stage.projbucket.Head("/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectProjectTags)
 }
 
 func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {