From fdae7e47a030b2a731d39e548d4167bfd1ddc907 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Wed, 14 Sep 2022 14:03:48 -0400 Subject: [PATCH] 19362: Use hardlinks to a singleton for each project/collection. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- sdk/go/arvados/fs_deferred.go | 35 ------ sdk/go/arvados/fs_project.go | 60 +++++++-- sdk/go/arvados/fs_project_test.go | 35 +++--- sdk/go/arvados/fs_site.go | 202 ++++++++++++++++++++++++------ sdk/go/arvados/fs_site_test.go | 50 ++++++++ sdk/go/arvados/fs_users.go | 4 +- services/keep-web/s3_test.go | 13 +- 7 files changed, 295 insertions(+), 104 deletions(-) diff --git a/sdk/go/arvados/fs_deferred.go b/sdk/go/arvados/fs_deferred.go index 1dfa2df6e4..e85446098f 100644 --- a/sdk/go/arvados/fs_deferred.go +++ b/sdk/go/arvados/fs_deferred.go @@ -5,45 +5,10 @@ package arvados import ( - "log" "os" "sync" - "time" ) -func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode { - modTime := coll.ModifiedAt - if modTime.IsZero() { - modTime = time.Now() - } - placeholder := &treenode{ - fs: fs, - parent: parent, - inodes: nil, - fileinfo: fileinfo{ - name: coll.Name, - modTime: modTime, - mode: 0755 | os.ModeDir, - sys: func() interface{} { return &coll }, - }, - } - return &deferrednode{wrapped: placeholder, create: func() inode { - err := fs.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil) - if err != nil { - log.Printf("BUG: unhandled error: %s", err) - return placeholder - } - newfs, err := coll.FileSystem(fs, fs) - if err != nil { - log.Printf("BUG: unhandled error: %s", err) - return placeholder - } - cfs := newfs.(*collectionFileSystem) - cfs.SetParent(parent, coll.Name) - return cfs - }} -} - // A deferrednode wraps an inode that's expensive to build. Initially, // it responds to basic directory functions by proxying to the given // placeholder. If a caller uses a read/write/lock operation, diff --git a/sdk/go/arvados/fs_project.go b/sdk/go/arvados/fs_project.go index faab6e4f04..88766129ae 100644 --- a/sdk/go/arvados/fs_project.go +++ b/sdk/go/arvados/fs_project.go @@ -6,7 +6,9 @@ package arvados import ( "log" + "os" "strings" + "time" ) func (fs *customFileSystem) defaultUUID(uuid string) (string, error) { @@ -64,9 +66,18 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in if strings.Contains(coll.UUID, "-j7d0g-") { // Group item was loaded into a Collection var -- but // we only need the Name and UUID anyway, so it's OK. - return fs.newProjectNode(parent, coll.Name, coll.UUID, nil), nil + return &hardlink{ + inode: fs.projectSingleton(coll.UUID, &Group{ + UUID: coll.UUID, + Name: coll.Name, + ModifiedAt: coll.ModifiedAt, + Properties: coll.Properties, + }), + parent: parent, + name: coll.Name, + }, nil } else if strings.Contains(coll.UUID, "-4zz18-") { - return deferredCollectionFS(fs, parent, coll), nil + return fs.newDeferredCollectionDir(parent, name, coll.UUID, coll.ModifiedAt), nil } else { log.Printf("group contents: unrecognized UUID in response: %q", coll.UUID) return nil, ErrInvalidArgument @@ -105,10 +116,14 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode, } for { - // The groups content endpoint returns Collection and Group (project) - // objects. This function only accesses the UUID and Name field. Both - // collections and groups have those fields, so it is easier to just treat - // the ObjectList that comes back as a CollectionList. + // The groups content endpoint returns + // Collection and Group (project) + // objects. This function only accesses the + // UUID, Name, and ModifiedAt fields. Both + // collections and groups have those fields, + // so it is easier to just treat the + // ObjectList that comes back as a + // CollectionList. var resp CollectionList err = fs.RequestAndDecode(&resp, "GET", "arvados/v1/groups/"+uuid+"/contents", nil, params) if err != nil { @@ -125,14 +140,14 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode, continue } if strings.Contains(i.UUID, "-j7d0g-") { - inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, &Group{ + inodes = append(inodes, fs.newProjectDir(parent, i.Name, i.UUID, &Group{ UUID: i.UUID, Name: i.Name, ModifiedAt: i.ModifiedAt, Properties: i.Properties, })) } else if strings.Contains(i.UUID, "-4zz18-") { - inodes = append(inodes, deferredCollectionFS(fs, parent, i)) + inodes = append(inodes, fs.newDeferredCollectionDir(parent, i.Name, i.UUID, i.ModifiedAt)) } else { log.Printf("group contents: unrecognized UUID in response: %q", i.UUID) return nil, ErrInvalidArgument @@ -143,3 +158,32 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode, } return inodes, nil } + +func (fs *customFileSystem) newProjectDir(parent inode, name, uuid string, proj *Group) inode { + return &hardlink{inode: fs.projectSingleton(uuid, proj), parent: parent, name: name} +} + +func (fs *customFileSystem) newDeferredCollectionDir(parent inode, name, uuid string, modTime time.Time) inode { + if modTime.IsZero() { + modTime = time.Now() + } + placeholder := &treenode{ + fs: fs, + parent: parent, + inodes: nil, + fileinfo: fileinfo{ + name: name, + modTime: modTime, + mode: 0755 | os.ModeDir, + sys: func() interface{} { return &Collection{UUID: uuid, Name: name, ModifiedAt: modTime} }, + }, + } + return &deferrednode{wrapped: placeholder, create: func() inode { + node, err := fs.collectionSingleton(uuid) + if err != nil { + log.Printf("BUG: unhandled error: %s", err) + return placeholder + } + return &hardlink{inode: node, parent: parent, name: name} + }} +} diff --git a/sdk/go/arvados/fs_project_test.go b/sdk/go/arvados/fs_project_test.go index 8e7f588156..d3dac7a14f 100644 --- a/sdk/go/arvados/fs_project_test.go +++ b/sdk/go/arvados/fs_project_test.go @@ -10,7 +10,6 @@ import ( "errors" "io" "os" - "path/filepath" "strings" check "gopkg.in/check.v1" @@ -102,14 +101,16 @@ func (s *SiteFSSuite) TestFilterGroup(c *check.C) { func (s *SiteFSSuite) TestCurrentUserHome(c *check.C) { s.fs.MountProject("home", "") - s.testHomeProject(c, "/home") + s.testHomeProject(c, "/home", "home") } func (s *SiteFSSuite) TestUsersDir(c *check.C) { - s.testHomeProject(c, "/users/active") + // /users/active is a hardlink to a dir whose name is the UUID + // of the active user + s.testHomeProject(c, "/users/active", fixtureActiveUserUUID) } -func (s *SiteFSSuite) testHomeProject(c *check.C, path string) { +func (s *SiteFSSuite) testHomeProject(c *check.C, path, expectRealName string) { f, err := s.fs.Open(path) c.Assert(err, check.IsNil) fis, err := f.Readdir(-1) @@ -130,8 +131,7 @@ func (s *SiteFSSuite) testHomeProject(c *check.C, path string) { fi, err := f.Stat() c.Assert(err, check.IsNil) c.Check(fi.IsDir(), check.Equals, true) - _, basename := filepath.Split(path) - c.Check(fi.Name(), check.Equals, basename) + c.Check(fi.Name(), check.Equals, expectRealName) f, err = s.fs.Open(path + "/A Project/A Subproject") c.Assert(err, check.IsNil) @@ -263,14 +263,10 @@ func (s *SiteFSSuite) TestProjectUpdatedByOther(c *check.C) { err = project.Sync() c.Check(err, check.IsNil) - _, err = s.fs.Open("/home/A Project/oob/test.txt") - c.Check(err, check.IsNil) - - // Sync again to mark the project dir as stale, so the - // collection gets reloaded from the controller on next - // lookup. - err = project.Sync() - c.Check(err, check.IsNil) + f, err = s.fs.Open("/home/A Project/oob/test.txt") + if c.Check(err, check.IsNil) { + f.Close() + } // Ensure collection was flushed by Sync var latest Collection @@ -288,10 +284,17 @@ func (s *SiteFSSuite) TestProjectUpdatedByOther(c *check.C) { }) c.Assert(err, check.IsNil) + // Sync again to reload collection. + err = project.Sync() + c.Check(err, check.IsNil) + + // Check test.txt deletion is reflected in fs. _, err = s.fs.Open("/home/A Project/oob/test.txt") c.Check(err, check.NotNil) - _, err = s.fs.Open("/home/A Project/oob") - c.Check(err, check.IsNil) + f, err = s.fs.Open("/home/A Project/oob") + if c.Check(err, check.IsNil) { + f.Close() + } err = s.client.RequestAndDecode(nil, "DELETE", "arvados/v1/collections/"+oob.UUID, nil, nil) c.Assert(err, check.IsNil) diff --git a/sdk/go/arvados/fs_site.go b/sdk/go/arvados/fs_site.go index 531d7968ab..716ef34e26 100644 --- a/sdk/go/arvados/fs_site.go +++ b/sdk/go/arvados/fs_site.go @@ -29,6 +29,10 @@ type customFileSystem struct { staleLock sync.Mutex forwardSlashNameSubstitution string + + byID map[string]inode + byIDLock sync.Mutex + byIDRoot *treenode } func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem { @@ -51,6 +55,17 @@ func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem { }, inodes: make(map[string]inode), } + fs.byID = map[string]inode{} + fs.byIDRoot = &treenode{ + fs: fs, + parent: root, + inodes: make(map[string]inode), + fileinfo: fileinfo{ + name: "_internal_by_id", + modTime: time.Now(), + mode: 0755 | os.ModeDir, + }, + } return fs } @@ -69,7 +84,7 @@ func (fs *customFileSystem) MountByID(mount string) { mode: 0755 | os.ModeDir, }, }, - create: fs.mountByID, + create: fs.newCollectionOrProjectHardlink, }, nil }) } @@ -78,7 +93,7 @@ func (fs *customFileSystem) MountProject(mount, uuid string) { fs.root.treenode.Lock() defer fs.root.treenode.Unlock() fs.root.treenode.Child(mount, func(inode) (inode, error) { - return fs.newProjectNode(fs.root, mount, uuid, nil), nil + return fs.newProjectDir(fs.root, mount, uuid, nil), nil }) } @@ -122,7 +137,7 @@ func (c *Client) SiteFileSystem(kc keepClient) CustomFileSystem { } func (fs *customFileSystem) Sync() error { - return fs.root.Sync() + return fs.byIDRoot.Sync() } // Stale returns true if information obtained at time t should be @@ -137,54 +152,59 @@ func (fs *customFileSystem) newNode(name string, perm os.FileMode, modTime time. return nil, ErrInvalidOperation } -func (fs *customFileSystem) mountByID(parent inode, id string) (inode, error) { +func (fs *customFileSystem) newCollectionOrProjectHardlink(parent inode, id string) (inode, error) { + fs.byIDLock.Lock() + n := fs.byID[id] + if n != nil { + // Avoid the extra remote API lookup if we already + // have a singleton for this ID. + fs.byIDLock.Unlock() + return &hardlink{inode: n, parent: parent, name: id}, nil + } + fs.byIDLock.Unlock() + if strings.Contains(id, "-4zz18-") || pdhRegexp.MatchString(id) { - return fs.mountCollection(parent, id) - } else if strings.Contains(id, "-j7d0g-") { - return fs.newProjectNode(fs.root, id, id, nil), nil + node, err := fs.collectionSingleton(id) + if os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, err + } + return &hardlink{inode: node, parent: parent, name: id}, nil + } else if strings.Contains(id, "-j7d0g-") || strings.Contains(id, "-tpzed-") { + proj, err := fs.getProject(id) + if os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, err + } + node := fs.projectSingleton(id, proj) + return &hardlink{inode: node, parent: parent, name: id}, nil } else { return nil, nil } } -func (fs *customFileSystem) mountCollection(parent inode, id string) (inode, error) { - var coll Collection - err := fs.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+id, nil, nil) - if statusErr, ok := err.(interface{ HTTPStatus() int }); ok && statusErr.HTTPStatus() == http.StatusNotFound { - return nil, nil - } else if err != nil { - return nil, err - } - if len(id) != 27 { - // This means id is a PDH, and controller/railsapi - // returned one of (possibly) many collections with - // that PDH. Even if controller returns more fields - // besides PDH and manifest text (which are equal for - // all matching collections), we don't want to expose - // them (e.g., through Sys()). - coll = Collection{ - PortableDataHash: coll.PortableDataHash, - ManifestText: coll.ManifestText, - } +func (fs *customFileSystem) projectSingleton(uuid string, proj *Group) inode { + fs.byIDLock.Lock() + defer fs.byIDLock.Unlock() + if n := fs.byID[uuid]; n != nil { + return n } - newfs, err := coll.FileSystem(fs, fs) - if err != nil { - return nil, err + name := uuid + if name == "" { + // special case uuid=="" implements the "home project" + // (owner_uuid == current user uuid) + name = "home" } - cfs := newfs.(*collectionFileSystem) - cfs.SetParent(parent, id) - return cfs, nil -} - -func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proj *Group) inode { var projLoading sync.Mutex - return &lookupnode{ + n := &lookupnode{ stale: fs.Stale, loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) }, loadAll: func(parent inode) ([]inode, error) { return fs.projectsLoadAll(parent, uuid) }, treenode: treenode{ fs: fs, - parent: root, + parent: fs.byIDRoot, inodes: make(map[string]inode), fileinfo: fileinfo{ name: name, @@ -196,17 +216,81 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proj * if proj != nil { return proj } - var g Group - err := fs.RequestAndDecode(&g, "GET", "arvados/v1/groups/"+uuid, nil, nil) + g, err := fs.getProject(uuid) if err != nil { return err } - proj = &g + proj = g return proj }, }, }, } + fs.byID[uuid] = n + return n +} + +func (fs *customFileSystem) getProject(uuid string) (*Group, error) { + var g Group + err := fs.RequestAndDecode(&g, "GET", "arvados/v1/groups/"+uuid, nil, nil) + if statusErr, ok := err.(interface{ HTTPStatus() int }); ok && statusErr.HTTPStatus() == http.StatusNotFound { + return nil, os.ErrNotExist + } else if err != nil { + return nil, err + } + return &g, err +} + +func (fs *customFileSystem) collectionSingleton(id string) (inode, error) { + fs.byIDLock.Lock() + if n := fs.byID[id]; n != nil { + fs.byIDLock.Unlock() + return n, nil + } + fs.byIDLock.Unlock() + + coll, err := fs.getCollection(id) + if err != nil { + return nil, err + } + newfs, err := coll.FileSystem(fs, fs) + if err != nil { + return nil, err + } + cfs := newfs.(*collectionFileSystem) + cfs.SetParent(fs.byIDRoot, id) + + fs.byIDLock.Lock() + defer fs.byIDLock.Unlock() + if n := fs.byID[id]; n != nil { + return n, nil + } + fs.byID[id] = cfs + fs.byIDRoot.Child(id, func(inode) (inode, error) { return cfs, nil }) + return cfs, nil +} + +func (fs *customFileSystem) getCollection(id string) (*Collection, error) { + var coll Collection + err := fs.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+id, nil, nil) + if statusErr, ok := err.(interface{ HTTPStatus() int }); ok && statusErr.HTTPStatus() == http.StatusNotFound { + return nil, os.ErrNotExist + } else if err != nil { + return nil, err + } + if len(id) != 27 { + // This means id is a PDH, and controller/railsapi + // returned one of (possibly) many collections with + // that PDH. Even if controller returns more fields + // besides PDH and manifest text (which are equal for + // all matching collections), we don't want to expose + // them (e.g., through Sys()). + coll = Collection{ + PortableDataHash: coll.PortableDataHash, + ManifestText: coll.ManifestText, + } + } + return &coll, nil } // vdirnode wraps an inode by rejecting (with ErrInvalidOperation) @@ -244,3 +328,41 @@ func (vn *vdirnode) Child(name string, replace func(inode) (inode, error)) (inod } }) } + +// A hardlink can be used to mount an existing node at an additional +// point in the same filesystem. +type hardlink struct { + inode + parent inode + name string +} + +func (hl *hardlink) Sync() error { + if node, ok := hl.inode.(syncer); ok { + return node.Sync() + } else { + return ErrInvalidOperation + } +} + +func (hl *hardlink) SetParent(parent inode, name string) { + hl.Lock() + defer hl.Unlock() + hl.parent = parent + hl.name = name +} + +func (hl *hardlink) Parent() inode { + hl.RLock() + defer hl.RUnlock() + return hl.parent +} + +func (hl *hardlink) FileInfo() os.FileInfo { + fi := hl.inode.FileInfo() + if fi, ok := fi.(fileinfo); ok { + fi.name = hl.name + return fi + } + return fi +} diff --git a/sdk/go/arvados/fs_site_test.go b/sdk/go/arvados/fs_site_test.go index 3abe2b457f..c7d6b2a464 100644 --- a/sdk/go/arvados/fs_site_test.go +++ b/sdk/go/arvados/fs_site_test.go @@ -22,6 +22,7 @@ const ( // Importing arvadostest would be an import cycle, so these // fixtures are duplicated here [until fs moves to a separate // package]. + fixtureActiveUserUUID = "zzzzz-tpzed-xurymjxw79nv3jz" fixtureActiveToken = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi" fixtureAProjectUUID = "zzzzz-j7d0g-v955i6s2oi1cbso" fixtureThisFilterGroupUUID = "zzzzz-j7d0g-thisfiltergroup" @@ -97,6 +98,55 @@ func (s *SiteFSSuite) TestUpdateStorageClasses(c *check.C) { c.Assert(err, check.ErrorMatches, `.*stub does not write storage class "archive"`) } +func (s *SiteFSSuite) TestSameCollectionDifferentPaths(c *check.C) { + s.fs.MountProject("home", "") + var coll Collection + err := s.client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{ + "collection": map[string]interface{}{ + "owner_uuid": fixtureAProjectUUID, + "name": fmt.Sprintf("test collection %d", time.Now().UnixNano()), + }, + }) + c.Assert(err, check.IsNil) + + viaProjID := "by_id/" + fixtureAProjectUUID + "/" + coll.Name + viaProjName := "home/A Project/" + coll.Name + viaCollID := "by_id/" + coll.UUID + for n, dirs := range [][]string{ + {viaCollID, viaProjID, viaProjName}, + {viaCollID, viaProjName, viaProjID}, + {viaProjID, viaProjName, viaCollID}, + {viaProjID, viaCollID, viaProjName}, + {viaProjName, viaCollID, viaProjID}, + {viaProjName, viaProjID, viaCollID}, + } { + filename := fmt.Sprintf("file %d", n) + f := make([]File, 3) + for i, dir := range dirs { + path := dir + "/" + filename + mode := os.O_RDWR + if i == 0 { + mode |= os.O_CREATE + c.Logf("create %s", path) + } else { + c.Logf("open %s", path) + } + f[i], err = s.fs.OpenFile(path, mode, 0777) + c.Assert(err, check.IsNil, check.Commentf("n=%d i=%d path=%s", n, i, path)) + defer f[i].Close() + } + _, err = io.WriteString(f[0], filename) + c.Assert(err, check.IsNil) + _, err = f[1].Seek(0, io.SeekEnd) + c.Assert(err, check.IsNil) + _, err = io.WriteString(f[1], filename) + c.Assert(err, check.IsNil) + buf, err := io.ReadAll(f[2]) + c.Assert(err, check.IsNil) + c.Check(string(buf), check.Equals, filename+filename) + } +} + func (s *SiteFSSuite) TestByUUIDAndPDH(c *check.C) { f, err := s.fs.Open("/by_id") c.Assert(err, check.IsNil) diff --git a/sdk/go/arvados/fs_users.go b/sdk/go/arvados/fs_users.go index ae47414b7a..5f9edb40fd 100644 --- a/sdk/go/arvados/fs_users.go +++ b/sdk/go/arvados/fs_users.go @@ -20,7 +20,7 @@ func (fs *customFileSystem) usersLoadOne(parent inode, name string) (inode, erro return nil, os.ErrNotExist } user := resp.Items[0] - return fs.newProjectNode(parent, user.Username, user.UUID, nil), nil + return fs.newProjectDir(parent, user.Username, user.UUID, nil), nil } func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) { @@ -41,7 +41,7 @@ func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) { if user.Username == "" { continue } - inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID, nil)) + inodes = append(inodes, fs.newProjectDir(parent, user.Username, user.UUID, nil)) } params.Filters = []Filter{{"uuid", ">", resp.Items[len(resp.Items)-1].UUID}} } diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go index 851bee4b72..476ec9437f 100644 --- a/services/keep-web/s3_test.go +++ b/services/keep-web/s3_test.go @@ -316,14 +316,14 @@ func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) { func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) - s.testS3PutObjectSuccess(c, stage.collbucket, "") + s.testS3PutObjectSuccess(c, stage.collbucket, "", stage.coll.UUID) } func (s *IntegrationSuite) TestS3ProjectPutObjectSuccess(c *check.C) { stage := s.s3setup(c) defer stage.teardown(c) - s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/") + s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/", stage.coll.UUID) } -func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string) { +func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string, collUUID string) { for _, trial := range []struct { path string size int @@ -390,6 +390,13 @@ func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, c.Check(err, check.IsNil) c.Check(buf2, check.HasLen, len(buf)) c.Check(bytes.Equal(buf, buf2), check.Equals, true) + + // Check that the change is immediately visible via + // (non-S3) webdav request. + _, resp := s.do("GET", "http://"+collUUID+".keep-web.example/"+trial.path, "", http.Header{ + "Authorization": {"Bearer " + arvadostest.ActiveToken}, + }) + c.Check(resp.Code, check.Equals, http.StatusOK) } } -- 2.30.2