From 846e3037de341d73e593a670b0d0e77bc3e893c1 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Tue, 8 Feb 2022 16:28:20 -0500 Subject: [PATCH] 18600: Add Snapshot and Splice methods. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- lib/mount/fs.go | 23 ++--- sdk/go/arvados/fs_base.go | 58 +++++++++++++ sdk/go/arvados/fs_collection.go | 101 ++++++++++++++++++++++ sdk/go/arvados/fs_deferred.go | 2 + sdk/go/arvados/fs_filehandle.go | 15 ++++ sdk/go/arvados/fs_site_test.go | 143 ++++++++++++++++++++++++++++++++ 6 files changed, 332 insertions(+), 10 deletions(-) diff --git a/lib/mount/fs.go b/lib/mount/fs.go index c008b96af6..3c2e628d01 100644 --- a/lib/mount/fs.go +++ b/lib/mount/fs.go @@ -5,6 +5,7 @@ package mount import ( + "errors" "io" "log" "os" @@ -121,23 +122,25 @@ func (fs *keepFS) Utimens(path string, tmsp []fuse.Timespec) int { } func (fs *keepFS) errCode(err error) int { - if os.IsNotExist(err) { + if err == nil { + return 0 + } + if errors.Is(err, os.ErrNotExist) { return -fuse.ENOENT } - switch err { - case os.ErrExist: + if errors.Is(err, os.ErrExist) { return -fuse.EEXIST - case arvados.ErrInvalidArgument: + } + if errors.Is(err, arvados.ErrInvalidArgument) { return -fuse.EINVAL - case arvados.ErrInvalidOperation: + } + if errors.Is(err, arvados.ErrInvalidOperation) { return -fuse.ENOSYS - case arvados.ErrDirectoryNotEmpty: + } + if errors.Is(err, arvados.ErrDirectoryNotEmpty) { return -fuse.ENOTEMPTY - case nil: - return 0 - default: - return -fuse.EIO } + return -fuse.EIO } func (fs *keepFS) Mkdir(path string, mode uint32) int { diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go index 5f2747ac9a..680a8431c5 100644 --- a/sdk/go/arvados/fs_base.go +++ b/sdk/go/arvados/fs_base.go @@ -77,6 +77,21 @@ type File interface { Stat() (os.FileInfo, error) Truncate(int64) error Sync() error + // Create a snapshot of a file or directory tree, which can + // then be spliced onto a different path or a different + // collection. + Snapshot() (*Subtree, error) + // Replace this file or directory with the given snapshot. It + // is an error to replace a directory with a file. If snapshot + // is (or might be) a directory, remove the directory, create + // a file with the same name, and splice the file. + Splice(snapshot *Subtree) error +} + +// A Subtree is a detached part of a filesystem tree that can be +// spliced into a filesystem via (File)Splice(). +type Subtree struct { + inode inode } // A FileSystem is an http.Filesystem plus Stat() and support for @@ -152,6 +167,12 @@ type inode interface { Readdir() ([]os.FileInfo, error) Size() int64 FileInfo() os.FileInfo + // Create a snapshot of this node and its descendants. + Snapshot() (inode, error) + // Replace this node with a copy of the provided snapshot. + // Caller may provide the same snapshot to multiple Splice + // calls, but must not modify the the snapshot concurrently. + Splice(inode) error // Child() performs lookups and updates of named child nodes. // @@ -270,6 +291,14 @@ func (*nullnode) MemorySize() int64 { return 64 } +func (*nullnode) Snapshot() (inode, error) { + return nil, ErrInvalidOperation +} + +func (*nullnode) Splice(inode) error { + return ErrInvalidOperation +} + type treenode struct { fs FileSystem parent inode @@ -697,3 +726,32 @@ func rlookup(start inode, path string) (node inode, err error) { func permittedName(name string) bool { return name != "" && name != "." && name != ".." && !strings.Contains(name, "/") } + +// Snapshot returns a Subtree that's a copy of the given path. It +// returns an error if the path is not inside a collection. +func Snapshot(fs FileSystem, path string) (*Subtree, error) { + f, err := fs.OpenFile(path, os.O_RDONLY, 0) + if err != nil { + return nil, err + } + defer f.Close() + return f.Snapshot() +} + +// Splice inserts newsubtree at the indicated target path. +// +// Splice returns an error if target is not inside a collection. +// +// Splice returns an error if target is an existing directory and +// newsubtree is a snapshot of a file. +func Splice(fs FileSystem, target string, newsubtree *Subtree) error { + f, err := fs.OpenFile(target, os.O_WRONLY, 0) + if os.IsNotExist(err) { + f, err = fs.OpenFile(target, os.O_CREATE|os.O_WRONLY, 0700) + } + if err != nil { + return err + } + defer f.Close() + return f.Splice(newsubtree) +} diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go index d087fd0944..afe92c9911 100644 --- a/sdk/go/arvados/fs_collection.go +++ b/sdk/go/arvados/fs_collection.go @@ -457,6 +457,14 @@ func (fs *collectionFileSystem) Size() int64 { return fs.fileSystem.root.(*dirnode).TreeSize() } +func (fs *collectionFileSystem) Snapshot() (inode, error) { + return fs.fileSystem.root.Snapshot() +} + +func (fs *collectionFileSystem) Splice(r inode) error { + return fs.fileSystem.root.Splice(r) +} + // filenodePtr is an offset into a file that is (usually) efficient to // seek to. Specifically, if filenode.repacked==filenodePtr.repacked // then @@ -876,6 +884,47 @@ func (fn *filenode) waitPrune() { } } +func (fn *filenode) Snapshot() (inode, error) { + fn.RLock() + defer fn.RUnlock() + segments := make([]segment, 0, len(fn.segments)) + for _, seg := range fn.segments { + segments = append(segments, seg.Slice(0, seg.Len())) + } + return &filenode{ + fileinfo: fn.fileinfo, + segments: segments, + }, nil +} + +func (fn *filenode) Splice(repl inode) error { + repl, err := repl.Snapshot() + if err != nil { + return err + } + fn.parent.Lock() + defer fn.parent.Unlock() + fn.Lock() + defer fn.Unlock() + _, err = fn.parent.Child(fn.fileinfo.name, func(inode) (inode, error) { return repl, nil }) + if err != nil { + return err + } + switch repl := repl.(type) { + case *dirnode: + repl.parent = fn.parent + repl.fileinfo.name = fn.fileinfo.name + repl.setTreeFS(fn.fs) + case *filenode: + repl.parent = fn.parent + repl.fileinfo.name = fn.fileinfo.name + repl.fs = fn.fs + default: + return fmt.Errorf("cannot splice snapshot containing %T: %w", repl, ErrInvalidArgument) + } + return nil +} + type dirnode struct { fs *collectionFileSystem treenode @@ -1489,6 +1538,58 @@ func (dn *dirnode) TreeSize() (bytes int64) { return } +func (dn *dirnode) Snapshot() (inode, error) { + return dn.snapshot() +} + +func (dn *dirnode) snapshot() (*dirnode, error) { + dn.RLock() + defer dn.RUnlock() + snap := &dirnode{ + treenode: treenode{ + inodes: make(map[string]inode, len(dn.inodes)), + fileinfo: dn.fileinfo, + }, + } + for name, child := range dn.inodes { + dupchild, err := child.Snapshot() + if err != nil { + return nil, err + } + snap.inodes[name] = dupchild + dupchild.SetParent(snap, name) + } + return snap, nil +} + +func (dn *dirnode) Splice(repl inode) error { + repldn, ok := repl.(*dirnode) + if !ok { + return fmt.Errorf("cannot use Splice to replace a directory with a file: %w", ErrInvalidArgument) + } + repldn, err := repldn.snapshot() + if err != nil { + return err + } + dn.Lock() + defer dn.Unlock() + dn.inodes = repldn.inodes + dn.setTreeFS(dn.fs) + return nil +} + +func (dn *dirnode) setTreeFS(fs *collectionFileSystem) { + dn.fs = fs + for _, child := range dn.inodes { + switch child := child.(type) { + case *dirnode: + child.setTreeFS(fs) + case *filenode: + child.fs = fs + } + } +} + type segment interface { io.ReaderAt Len() int diff --git a/sdk/go/arvados/fs_deferred.go b/sdk/go/arvados/fs_deferred.go index bb6c7a2626..66a126a39c 100644 --- a/sdk/go/arvados/fs_deferred.go +++ b/sdk/go/arvados/fs_deferred.go @@ -113,3 +113,5 @@ func (dn *deferrednode) RUnlock() { dn.realinode().RUnloc func (dn *deferrednode) FS() FileSystem { return dn.currentinode().FS() } func (dn *deferrednode) Parent() inode { return dn.currentinode().Parent() } func (dn *deferrednode) MemorySize() int64 { return dn.currentinode().MemorySize() } +func (dn *deferrednode) Snapshot() (inode, error) { return dn.realinode().Snapshot() } +func (dn *deferrednode) Splice(repl inode) error { return dn.realinode().Splice(repl) } diff --git a/sdk/go/arvados/fs_filehandle.go b/sdk/go/arvados/fs_filehandle.go index 9af8d0ad40..4530a7b06a 100644 --- a/sdk/go/arvados/fs_filehandle.go +++ b/sdk/go/arvados/fs_filehandle.go @@ -110,3 +110,18 @@ func (f *filehandle) Sync() error { // Sync the containing filesystem. return f.FS().Sync() } + +func (f *filehandle) Snapshot() (*Subtree, error) { + if !f.readable { + return nil, ErrInvalidOperation + } + node, err := f.inode.Snapshot() + return &Subtree{inode: node}, err +} + +func (f *filehandle) Splice(r *Subtree) error { + if !f.writable { + return ErrReadOnlyFile + } + return f.inode.Splice(r.inode) +} diff --git a/sdk/go/arvados/fs_site_test.go b/sdk/go/arvados/fs_site_test.go index 51ca88764e..9d76311800 100644 --- a/sdk/go/arvados/fs_site_test.go +++ b/sdk/go/arvados/fs_site_test.go @@ -5,8 +5,12 @@ package arvados import ( + "fmt" + "io" + "io/ioutil" "net/http" "os" + "syscall" "time" check "gopkg.in/check.v1" @@ -144,3 +148,142 @@ func (s *SiteFSSuite) TestByUUIDAndPDH(c *check.C) { err = s.fs.Rename("/by_id", "/beep") c.Check(err, check.Equals, ErrInvalidArgument) } + +// Copy subtree from OS src to dst path inside fs. If src is a +// directory, dst must exist and be a directory. +func copyFromOS(fs FileSystem, dst, src string) error { + inf, err := os.Open(src) + if err != nil { + return err + } + defer inf.Close() + dirents, err := inf.Readdir(-1) + if e, ok := err.(*os.PathError); ok { + if e, ok := e.Err.(syscall.Errno); ok { + if e == syscall.ENOTDIR { + err = syscall.ENOTDIR + } + } + } + if err == syscall.ENOTDIR { + outf, err := fs.OpenFile(dst, os.O_CREATE|os.O_EXCL|os.O_TRUNC|os.O_WRONLY, 0700) + if err != nil { + return fmt.Errorf("open %s: %s", dst, err) + } + defer outf.Close() + _, err = io.Copy(outf, inf) + if err != nil { + return fmt.Errorf("%s: copying data from %s: %s", dst, src, err) + } + err = outf.Close() + if err != nil { + return err + } + } else if err != nil { + return fmt.Errorf("%s: readdir: %T %s", src, err, err) + } else { + { + d, err := fs.Open(dst) + if err != nil { + return fmt.Errorf("opendir(%s): %s", dst, err) + } + d.Close() + } + for _, ent := range dirents { + if ent.Name() == "." || ent.Name() == ".." { + continue + } + dstname := dst + "/" + ent.Name() + if ent.IsDir() { + err = fs.Mkdir(dstname, 0700) + if err != nil { + return fmt.Errorf("mkdir %s: %s", dstname, err) + } + } + err = copyFromOS(fs, dstname, src+"/"+ent.Name()) + if err != nil { + return err + } + } + } + return nil +} + +func (s *SiteFSSuite) TestSnapshotSplice(c *check.C) { + s.fs.MountProject("home", "") + + var src1 Collection + err := s.client.RequestAndDecode(&src1, "POST", "arvados/v1/collections", nil, map[string]interface{}{ + "collection": map[string]string{ + "name": "TestSnapshotSplice src1", + "owner_uuid": fixtureAProjectUUID, + }, + }) + c.Assert(err, check.IsNil) + defer s.client.RequestAndDecode(nil, "DELETE", "arvados/v1/collections/"+src1.UUID, nil, nil) + err = s.fs.Sync() + c.Assert(err, check.IsNil) + err = copyFromOS(s.fs, "/home/A Project/TestSnapshotSplice src1", "..") // arvados.git/sdk/go + c.Assert(err, check.IsNil) + + var src2 Collection + err = s.client.RequestAndDecode(&src2, "POST", "arvados/v1/collections", nil, map[string]interface{}{ + "collection": map[string]string{ + "name": "TestSnapshotSplice src2", + "owner_uuid": fixtureAProjectUUID, + }, + }) + c.Assert(err, check.IsNil) + defer s.client.RequestAndDecode(nil, "DELETE", "arvados/v1/collections/"+src2.UUID, nil, nil) + err = s.fs.Sync() + c.Assert(err, check.IsNil) + err = copyFromOS(s.fs, "/home/A Project/TestSnapshotSplice src2", "..") // arvados.git/sdk/go + c.Assert(err, check.IsNil) + + var dst Collection + err = s.client.RequestAndDecode(&dst, "POST", "arvados/v1/collections", nil, map[string]interface{}{ + "collection": map[string]string{ + "name": "TestSnapshotSplice dst", + "owner_uuid": fixtureAProjectUUID, + }, + }) + c.Assert(err, check.IsNil) + defer s.client.RequestAndDecode(nil, "DELETE", "arvados/v1/collections/"+dst.UUID, nil, nil) + err = s.fs.Sync() + c.Assert(err, check.IsNil) + err = copyFromOS(s.fs, "/home/A Project/TestSnapshotSplice dst", "..") // arvados.git/sdk/go + c.Assert(err, check.IsNil) + + snap1, err := Snapshot(s.fs, "/home/A Project/TestSnapshotSplice src1/ctxlog") + c.Assert(err, check.IsNil) + err = Splice(s.fs, "/home/A Project/TestSnapshotSplice dst/ctxlog-copy", snap1) + c.Assert(err, check.IsNil) + err = Splice(s.fs, "/home/A Project/TestSnapshotSplice dst/ctxlog-copy2", snap1) + c.Assert(err, check.IsNil) + + snap2, err := Snapshot(s.fs, "/home/A Project/TestSnapshotSplice dst/ctxlog-copy") + c.Assert(err, check.IsNil) + err = Splice(s.fs, "/home/A Project/TestSnapshotSplice dst/ctxlog-copy-copy", snap2) + c.Assert(err, check.IsNil) + + snapDst, err := Snapshot(s.fs, "/home/A Project/TestSnapshotSplice dst") + c.Assert(err, check.IsNil) + err = Splice(s.fs, "/home/A Project/TestSnapshotSplice dst", snapDst) + c.Assert(err, check.IsNil) + err = Splice(s.fs, "/home/A Project/TestSnapshotSplice dst/copy1", snapDst) + c.Assert(err, check.IsNil) + err = Splice(s.fs, "/home/A Project/TestSnapshotSplice dst/copy2", snapDst) + c.Assert(err, check.IsNil) + err = s.fs.RemoveAll("/home/A Project/TestSnapshotSplice dst/arvados") + c.Assert(err, check.IsNil) + _, err = s.fs.Open("/home/A Project/TestSnapshotSplice dst/arvados/fs_site_test.go") + c.Assert(err, check.Equals, os.ErrNotExist) + f, err := s.fs.Open("/home/A Project/TestSnapshotSplice dst/copy2/arvados/fs_site_test.go") + c.Assert(err, check.IsNil) + defer f.Close() + buf, err := ioutil.ReadAll(f) + c.Check(err, check.IsNil) + c.Check(string(buf), check.Not(check.Equals), "") + err = f.Close() + c.Assert(err, check.IsNil) +} -- 2.30.2