"errors"
"fmt"
"io"
+ "io/fs"
"log"
"net/http"
"os"
ErrIsDirectory = errors.New("cannot rename file to overwrite existing directory")
ErrNotADirectory = errors.New("not a directory")
ErrPermission = os.ErrPermission
+ DebugLocksPanicMode = false
)
type syncer interface {
Sync() error
}
+func debugPanicIfNotLocked(l sync.Locker, writing bool) {
+ if !DebugLocksPanicMode {
+ return
+ }
+ race := false
+ if rl, ok := l.(interface {
+ RLock()
+ RUnlock()
+ }); ok && writing {
+ go func() {
+ // Fail if we can grab the read lock during an
+ // operation that purportedly has write lock.
+ rl.RLock()
+ race = true
+ rl.RUnlock()
+ }()
+ } else {
+ go func() {
+ l.Lock()
+ race = true
+ l.Unlock()
+ }()
+ }
+ time.Sleep(100)
+ if race {
+ panic("bug: caller-must-have-lock func called, but nobody has lock")
+ }
+}
+
// A File is an *os.File-like interface for reading and writing files
// in a FileSystem.
type File interface {
Stat() (os.FileInfo, error)
Truncate(int64) error
Sync() error
+ // Create a snapshot of a file or directory tree, which can
+ // then be spliced onto a different path or a different
+ // collection.
+ Snapshot() (*Subtree, error)
+ // Replace this file or directory with the given snapshot.
+ // The target must be inside a collection: Splice returns an
+ // error if the File is a virtual file or directory like
+ // by_id, a project directory, .arvados#collection,
+ // etc. Splice can replace directories with regular files and
+ // vice versa, except it cannot replace the root directory of
+ // a collection with a regular file.
+ Splice(snapshot *Subtree) error
+}
+
+// A Subtree is a detached part of a filesystem tree that can be
+// spliced into a filesystem via (File)Splice().
+type Subtree struct {
+ inode inode
}
// A FileSystem is an http.Filesystem plus Stat() and support for
MemorySize() int64
}
+type fsFS struct {
+ FileSystem
+}
+
+// FS returns an fs.FS interface to the given FileSystem, to enable
+// the use of fs.WalkDir, etc.
+func FS(fs FileSystem) fs.FS { return fsFS{fs} }
+func (fs fsFS) Open(path string) (fs.File, error) {
+ f, err := fs.FileSystem.Open(path)
+ return f, err
+}
+
type inode interface {
SetParent(parent inode, name string)
Parent() inode
Readdir() ([]os.FileInfo, error)
Size() int64
FileInfo() os.FileInfo
+ // Create a snapshot of this node and its descendants.
+ Snapshot() (inode, error)
+ // Replace this node with a copy of the provided snapshot.
+ // Caller may provide the same snapshot to multiple Splice
+ // calls, but must not modify the snapshot concurrently.
+ Splice(inode) error
// Child() performs lookups and updates of named child nodes.
//
mode os.FileMode
size int64
modTime time.Time
+ // If not nil, sys() returns the source data structure, which
+ // can be a *Collection, *Group, or nil. Currently populated
+ // only for project dirs and top-level collection dirs. Does
+ // not stay up to date with upstream changes.
+ //
+ // Intended to support keep-web's properties-as-s3-metadata
+ // feature (https://dev.arvados.org/issues/19088).
+ sys func() interface{}
}
// Name implements os.FileInfo.
return fi.size
}
-// Sys implements os.FileInfo.
+// Sys implements os.FileInfo. See comment in fileinfo struct.
func (fi fileinfo) Sys() interface{} {
- return nil
+ if fi.sys == nil {
+ return nil
+ }
+ return fi.sys()
}
type nullnode struct{}
return 64
}
+func (*nullnode) Snapshot() (inode, error) {
+ return nil, ErrInvalidOperation
+}
+
+func (*nullnode) Splice(inode) error {
+ return ErrInvalidOperation
+}
+
type treenode struct {
fs FileSystem
parent inode
}
func (n *treenode) Child(name string, replace func(inode) (inode, error)) (child inode, err error) {
+ debugPanicIfNotLocked(n, false)
child = n.inodes[name]
if name == "" || name == "." || name == ".." {
err = ErrInvalidArgument
return
}
if newchild == nil {
+ debugPanicIfNotLocked(n, true)
delete(n.inodes, name)
} else if newchild != child {
+ debugPanicIfNotLocked(n, true)
n.inodes[name] = newchild
n.fileinfo.modTime = time.Now()
child = newchild
}
func (n *treenode) MemorySize() (size int64) {
+ // To avoid making other callers wait while we count the
+ // entire filesystem size, we lock the node only long enough
+ // to copy the list of children. We accept that the resulting
+ // size will sometimes be misleading (e.g., we will
+ // double-count an item that moves from A to B after we check
+ // A's size but before we check B's size).
n.RLock()
- defer n.RUnlock()
+ debugPanicIfNotLocked(n, false)
+ todo := make([]inode, 0, len(n.inodes))
for _, inode := range n.inodes {
+ todo = append(todo, inode)
+ }
+ n.RUnlock()
+ for _, inode := range todo {
size += inode.MemorySize()
}
- return
+ return 64 + size
}
type fileSystem struct {
default:
return nil, fmt.Errorf("invalid flags 0x%x", flag)
}
- if !writable && parent.IsDir() {
+ if parent.IsDir() {
// A directory can be opened via "foo/", "foo/.", or
// "foo/..".
switch name {
case ".", "":
- return &filehandle{inode: parent}, nil
+ return &filehandle{inode: parent, readable: readable, writable: writable}, nil
case "..":
- return &filehandle{inode: parent.Parent()}, nil
+ return &filehandle{inode: parent.Parent(), readable: readable, writable: writable}, nil
}
}
createMode := flag&os.O_CREATE != 0
- if createMode {
- parent.Lock()
- defer parent.Unlock()
- } else {
- parent.RLock()
- defer parent.RUnlock()
- }
+ // We always need to take Lock() here, not just RLock(). Even
+ // if we know we won't be creating a file, parent might be a
+ // lookupnode, which sometimes populates its inodes map during
+ // a Child() call.
+ parent.Lock()
+ defer parent.Unlock()
n, err := parent.Child(name, nil)
if err != nil {
return nil, err
// supported. Locking inodes from different
// filesystems could deadlock, so we must error out
// now.
- return ErrInvalidArgument
+ return ErrInvalidOperation
}
// To ensure we can test reliably whether we're about to move
}
locked := map[sync.Locker]bool{}
for i := len(needLock) - 1; i >= 0; i-- {
- if n := needLock[i]; !locked[n] {
+ n := needLock[i]
+ if fs, ok := n.(interface{ rootnode() inode }); ok {
+ // Lock the fs's root dir directly, not
+ // through the fs. Otherwise our "locked" map
+ // would not reliably prevent double-locking
+ // the fs's root dir.
+ n = fs.rootnode()
+ }
+ if !locked[n] {
n.Lock()
defer n.Unlock()
locked[n] = true
}
}
node, err = func() (inode, error) {
- node.RLock()
- defer node.RUnlock()
+ node.Lock()
+ defer node.Unlock()
return node.Child(name, nil)
}()
if node == nil || err != nil {
func permittedName(name string) bool {
return name != "" && name != "." && name != ".." && !strings.Contains(name, "/")
}
+
+// Snapshot returns a Subtree that's a copy of the given path. It
+// returns an error if the path is not inside a collection.
+func Snapshot(fs FileSystem, path string) (*Subtree, error) {
+ f, err := fs.OpenFile(path, os.O_RDONLY, 0)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+ return f.Snapshot()
+}
+
+// Splice inserts newsubtree at the indicated target path.
+//
+// Splice returns an error if target is not inside a collection.
+//
+// Splice returns an error if target is the root of a collection and
+// newsubtree is a snapshot of a file.
+func Splice(fs FileSystem, target string, newsubtree *Subtree) error {
+ f, err := fs.OpenFile(target, os.O_WRONLY, 0)
+ if os.IsNotExist(err) {
+ f, err = fs.OpenFile(target, os.O_CREATE|os.O_WRONLY, 0700)
+ }
+ if err != nil {
+ return fmt.Errorf("open %s: %w", target, err)
+ }
+ defer f.Close()
+ return f.Splice(newsubtree)
+}