1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: Apache-2.0
22 ErrReadOnlyFile = errors.New("read-only file")
23 ErrNegativeOffset = errors.New("cannot seek to negative offset")
24 ErrFileExists = errors.New("file exists")
25 ErrInvalidOperation = errors.New("invalid operation")
26 ErrInvalidArgument = errors.New("invalid argument")
27 ErrDirectoryNotEmpty = errors.New("directory not empty")
28 ErrWriteOnlyMode = errors.New("file is O_WRONLY")
29 ErrSyncNotSupported = errors.New("O_SYNC flag is not supported")
30 ErrIsDirectory = errors.New("cannot rename file to overwrite existing directory")
31 ErrNotADirectory = errors.New("not a directory")
32 ErrPermission = os.ErrPermission
33 DebugLocksPanicMode = false
36 type syncer interface {
40 func debugPanicIfNotLocked(l sync.Locker, writing bool) {
41 if !DebugLocksPanicMode {
45 if rl, ok := l.(interface {
50 // Fail if we can grab the read lock during an
51 // operation that purportedly has write lock.
65 panic("bug: caller-must-have-lock func called, but nobody has lock")
69 // A File is an *os.File-like interface for reading and writing files
77 Readdir(int) ([]os.FileInfo, error)
78 Stat() (os.FileInfo, error)
81 // Create a snapshot of a file or directory tree, which can
82 // then be spliced onto a different path or a different
84 Snapshot() (*Subtree, error)
85 // Replace this file or directory with the given snapshot.
86 // The target must be inside a collection: Splice returns an
87 // error if the File is a virtual file or directory like
88 // by_id, a project directory, .arvados#collection,
89 // etc. Splice can replace directories with regular files and
90 // vice versa, except it cannot replace the root directory of
91 // a collection with a regular file.
92 Splice(snapshot *Subtree) error
95 // A Subtree is a detached part of a filesystem tree that can be
96 // spliced into a filesystem via (File)Splice().
101 // A FileSystem is an http.Filesystem plus Stat() and support for
102 // opening writable files. All methods are safe to call from multiple
104 type FileSystem interface {
110 // filesystem-wide lock: used by Rename() to prevent deadlock
111 // while locking multiple inodes.
114 // throttle for limiting concurrent background writers
117 // create a new node with nil parent.
118 newNode(name string, perm os.FileMode, modTime time.Time) (node inode, err error)
120 // analogous to os.Stat()
121 Stat(name string) (os.FileInfo, error)
123 // analogous to os.Create(): create/truncate a file and open it O_RDWR.
124 Create(name string) (File, error)
126 // Like os.OpenFile(): create or open a file or directory.
128 // If flag&os.O_EXCL==0, it opens an existing file or
129 // directory if one exists. If flag&os.O_CREATE!=0, it creates
130 // a new empty file or directory if one does not already
133 // When creating a new item, perm&os.ModeDir determines
134 // whether it is a file or a directory.
136 // A file can be opened multiple times and used concurrently
137 // from multiple goroutines. However, each File object should
138 // be used by only one goroutine at a time.
139 OpenFile(name string, flag int, perm os.FileMode) (File, error)
141 Mkdir(name string, perm os.FileMode) error
142 Remove(name string) error
143 RemoveAll(name string) error
144 Rename(oldname, newname string) error
146 // Write buffered data from memory to storage, returning when
147 // all updates have been saved to persistent storage.
150 // Write buffered data from memory to storage, but don't wait
151 // for all writes to finish before returning. If shortBlocks
152 // is true, flush everything; otherwise, if there's less than
153 // a full block of buffered data at the end of a stream, leave
154 // it buffered in memory in case more data can be appended. If
155 // path is "", flush all dirs/streams; otherwise, flush only
156 // the specified dir/stream.
157 Flush(path string, shortBlocks bool) error
159 // Estimate current memory usage.
167 // FS returns an fs.FS interface to the given FileSystem, to enable
168 // the use of fs.WalkDir, etc.
169 func FS(fs FileSystem) fs.FS { return fsFS{fs} }
170 func (fs fsFS) Open(path string) (fs.File, error) {
171 f, err := fs.FileSystem.Open(path)
175 type inode interface {
176 SetParent(parent inode, name string)
179 Read([]byte, filenodePtr) (int, filenodePtr, error)
180 Write([]byte, filenodePtr) (int, filenodePtr, error)
181 Truncate(int64) error
183 Readdir() ([]os.FileInfo, error)
185 FileInfo() os.FileInfo
186 // Create a snapshot of this node and its descendants.
187 Snapshot() (inode, error)
188 // Replace this node with a copy of the provided snapshot.
189 // Caller may provide the same snapshot to multiple Splice
190 // calls, but must not modify the snapshot concurrently.
193 // Child() performs lookups and updates of named child nodes.
195 // (The term "child" here is used strictly. This means name is
196 // not "." or "..", and name does not contain "/".)
198 // If replace is non-nil, Child calls replace(x) where x is
199 // the current child inode with the given name. If possible,
200 // the child inode is replaced with the one returned by
203 // If replace(x) returns an inode (besides x or nil) that is
204 // subsequently returned by Child(), then Child()'s caller
205 // must ensure the new child's name and parent are set/updated
206 // to Child()'s name argument and its receiver respectively.
207 // This is not necessarily done before replace(x) returns, but
208 // it must be done before Child()'s caller releases the
211 // Nil represents "no child". replace(nil) signifies that no
212 // child with this name exists yet. If replace() returns nil,
213 // the existing child should be deleted if possible.
215 // An implementation of Child() is permitted to ignore
216 // replace() or its return value. For example, a regular file
217 // inode does not have children, so Child() always returns
220 // Child() returns the child, if any, with the given name: if
221 // a child was added or changed, the new child is returned.
223 // Caller must have lock (or rlock if replace is nil).
224 Child(name string, replace func(inode) (inode, error)) (inode, error)
232 type fileinfo struct {
237 // If not nil, sys() returns the source data structure, which
238 // can be a *Collection, *Group, or nil. Currently populated
239 // only for project dirs and top-level collection dirs. Does
240 // not stay up to date with upstream changes.
242 // Intended to support keep-web's properties-as-s3-metadata
243 // feature (https://dev.arvados.org/issues/19088).
244 sys func() interface{}
247 // Name implements os.FileInfo.
248 func (fi fileinfo) Name() string {
252 // ModTime implements os.FileInfo.
253 func (fi fileinfo) ModTime() time.Time {
257 // Mode implements os.FileInfo.
258 func (fi fileinfo) Mode() os.FileMode {
262 // IsDir implements os.FileInfo.
263 func (fi fileinfo) IsDir() bool {
264 return fi.mode&os.ModeDir != 0
267 // Size implements os.FileInfo.
268 func (fi fileinfo) Size() int64 {
272 // Sys implements os.FileInfo. See comment in fileinfo struct.
273 func (fi fileinfo) Sys() interface{} {
280 type nullnode struct{}
282 func (*nullnode) Mkdir(string, os.FileMode) error {
283 return ErrInvalidOperation
286 func (*nullnode) Read([]byte, filenodePtr) (int, filenodePtr, error) {
287 return 0, filenodePtr{}, ErrInvalidOperation
290 func (*nullnode) Write([]byte, filenodePtr) (int, filenodePtr, error) {
291 return 0, filenodePtr{}, ErrInvalidOperation
294 func (*nullnode) Truncate(int64) error {
295 return ErrInvalidOperation
298 func (*nullnode) FileInfo() os.FileInfo {
302 func (*nullnode) IsDir() bool {
306 func (*nullnode) Readdir() ([]os.FileInfo, error) {
307 return nil, ErrInvalidOperation
310 func (*nullnode) Child(name string, replace func(inode) (inode, error)) (inode, error) {
311 return nil, ErrNotADirectory
314 func (*nullnode) MemorySize() int64 {
315 // Types that embed nullnode should report their own size, but
316 // if they don't, we at least report a non-zero size to ensure
317 // a large tree doesn't get reported as 0 bytes.
321 func (*nullnode) Snapshot() (inode, error) {
322 return nil, ErrInvalidOperation
325 func (*nullnode) Splice(inode) error {
326 return ErrInvalidOperation
329 type treenode struct {
332 inodes map[string]inode
338 func (n *treenode) FS() FileSystem {
342 func (n *treenode) SetParent(p inode, name string) {
346 n.fileinfo.name = name
349 func (n *treenode) Parent() inode {
355 func (n *treenode) IsDir() bool {
359 func (n *treenode) Child(name string, replace func(inode) (inode, error)) (child inode, err error) {
360 debugPanicIfNotLocked(n, false)
361 child = n.inodes[name]
362 if name == "" || name == "." || name == ".." {
363 err = ErrInvalidArgument
369 newchild, err := replace(child)
374 debugPanicIfNotLocked(n, true)
375 delete(n.inodes, name)
376 } else if newchild != child {
377 debugPanicIfNotLocked(n, true)
378 n.inodes[name] = newchild
379 n.fileinfo.modTime = time.Now()
385 func (n *treenode) Size() int64 {
386 return n.FileInfo().Size()
389 func (n *treenode) FileInfo() os.FileInfo {
392 n.fileinfo.size = int64(len(n.inodes))
396 func (n *treenode) Readdir() (fi []os.FileInfo, err error) {
399 fi = make([]os.FileInfo, 0, len(n.inodes))
400 for _, inode := range n.inodes {
401 fi = append(fi, inode.FileInfo())
406 func (n *treenode) Sync() error {
409 for _, inode := range n.inodes {
410 syncer, ok := inode.(syncer)
412 return ErrInvalidOperation
422 func (n *treenode) MemorySize() (size int64) {
425 debugPanicIfNotLocked(n, false)
426 for _, inode := range n.inodes {
427 size += inode.MemorySize()
432 type fileSystem struct {
439 func (fs *fileSystem) rootnode() inode {
443 func (fs *fileSystem) throttle() *throttle {
447 func (fs *fileSystem) locker() sync.Locker {
451 // OpenFile is analogous to os.OpenFile().
452 func (fs *fileSystem) OpenFile(name string, flag int, perm os.FileMode) (File, error) {
453 return fs.openFile(name, flag, perm)
456 func (fs *fileSystem) openFile(name string, flag int, perm os.FileMode) (*filehandle, error) {
457 if flag&os.O_SYNC != 0 {
458 return nil, ErrSyncNotSupported
460 dirname, name := path.Split(name)
461 parent, err := rlookup(fs.root, dirname)
465 var readable, writable bool
466 switch flag & (os.O_RDWR | os.O_RDONLY | os.O_WRONLY) {
475 return nil, fmt.Errorf("invalid flags 0x%x", flag)
478 // A directory can be opened via "foo/", "foo/.", or
482 return &filehandle{inode: parent, readable: readable, writable: writable}, nil
484 return &filehandle{inode: parent.Parent(), readable: readable, writable: writable}, nil
487 createMode := flag&os.O_CREATE != 0
488 // We always need to take Lock() here, not just RLock(). Even
489 // if we know we won't be creating a file, parent might be a
490 // lookupnode, which sometimes populates its inodes map during
493 defer parent.Unlock()
494 n, err := parent.Child(name, nil)
499 return nil, os.ErrNotExist
501 n, err = parent.Child(name, func(inode) (repl inode, err error) {
502 repl, err = parent.FS().newNode(name, perm|0755, time.Now())
506 repl.SetParent(parent, name)
512 // Parent rejected new child, but returned no error
513 return nil, ErrInvalidArgument
515 } else if flag&os.O_EXCL != 0 {
516 return nil, ErrFileExists
517 } else if flag&os.O_TRUNC != 0 {
519 return nil, fmt.Errorf("invalid flag O_TRUNC in read-only mode")
520 } else if n.IsDir() {
521 return nil, fmt.Errorf("invalid flag O_TRUNC when opening directory")
522 } else if err := n.Truncate(0); err != nil {
528 append: flag&os.O_APPEND != 0,
534 func (fs *fileSystem) Open(name string) (http.File, error) {
535 return fs.OpenFile(name, os.O_RDONLY, 0)
538 func (fs *fileSystem) Create(name string) (File, error) {
539 return fs.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0)
542 func (fs *fileSystem) Mkdir(name string, perm os.FileMode) error {
543 dirname, name := path.Split(name)
544 n, err := rlookup(fs.root, dirname)
550 if child, err := n.Child(name, nil); err != nil {
552 } else if child != nil {
556 _, err = n.Child(name, func(inode) (repl inode, err error) {
557 repl, err = n.FS().newNode(name, perm|os.ModeDir, time.Now())
561 repl.SetParent(n, name)
567 func (fs *fileSystem) Stat(name string) (os.FileInfo, error) {
568 node, err := rlookup(fs.root, name)
572 return node.FileInfo(), nil
575 func (fs *fileSystem) Rename(oldname, newname string) error {
576 olddir, oldname := path.Split(oldname)
577 if oldname == "" || oldname == "." || oldname == ".." {
578 return ErrInvalidArgument
580 olddirf, err := fs.openFile(olddir+".", os.O_RDONLY, 0)
582 return fmt.Errorf("%q: %s", olddir, err)
584 defer olddirf.Close()
586 newdir, newname := path.Split(newname)
587 if newname == "." || newname == ".." {
588 return ErrInvalidArgument
589 } else if newname == "" {
590 // Rename("a/b", "c/") means Rename("a/b", "c/b")
593 newdirf, err := fs.openFile(newdir+".", os.O_RDONLY, 0)
595 return fmt.Errorf("%q: %s", newdir, err)
597 defer newdirf.Close()
599 // TODO: If the nearest common ancestor ("nca") of olddirf and
600 // newdirf is on a different filesystem than fs, we should
601 // call nca.FS().Rename() instead of proceeding. Until then
602 // it's awkward for filesystems to implement their own Rename
603 // methods effectively: the only one that runs is the one on
604 // the root FileSystem exposed to the caller (webdav, fuse,
607 // When acquiring locks on multiple inodes, avoid deadlock by
608 // locking the entire containing filesystem first.
609 cfs := olddirf.inode.FS()
611 defer cfs.locker().Unlock()
613 if cfs != newdirf.inode.FS() {
614 // Moving inodes across filesystems is not (yet)
615 // supported. Locking inodes from different
616 // filesystems could deadlock, so we must error out
618 return ErrInvalidOperation
621 // To ensure we can test reliably whether we're about to move
622 // a directory into itself, lock all potential common
623 // ancestors of olddir and newdir.
624 needLock := []sync.Locker{}
625 for _, node := range []inode{olddirf.inode, newdirf.inode} {
626 needLock = append(needLock, node)
627 for node.Parent() != node && node.Parent().FS() == node.FS() {
629 needLock = append(needLock, node)
632 locked := map[sync.Locker]bool{}
633 for i := len(needLock) - 1; i >= 0; i-- {
634 if n := needLock[i]; !locked[n] {
641 _, err = olddirf.inode.Child(oldname, func(oldinode inode) (inode, error) {
643 return oldinode, os.ErrNotExist
645 if locked[oldinode] {
646 // oldinode cannot become a descendant of itself.
647 return oldinode, ErrInvalidArgument
649 if oldinode.FS() != cfs && newdirf.inode != olddirf.inode {
650 // moving a mount point to a different parent
651 // is not (yet) supported.
652 return oldinode, ErrInvalidArgument
654 accepted, err := newdirf.inode.Child(newname, func(existing inode) (inode, error) {
655 if existing != nil && existing.IsDir() {
656 return existing, ErrIsDirectory
661 // Leave oldinode in olddir.
664 accepted.SetParent(newdirf.inode, newname)
670 func (fs *fileSystem) Remove(name string) error {
671 return fs.remove(strings.TrimRight(name, "/"), false)
674 func (fs *fileSystem) RemoveAll(name string) error {
675 err := fs.remove(strings.TrimRight(name, "/"), true)
676 if os.IsNotExist(err) {
677 // "If the path does not exist, RemoveAll returns
678 // nil." (see "os" pkg)
684 func (fs *fileSystem) remove(name string, recursive bool) error {
685 dirname, name := path.Split(name)
686 if name == "" || name == "." || name == ".." {
687 return ErrInvalidArgument
689 dir, err := rlookup(fs.root, dirname)
695 _, err = dir.Child(name, func(node inode) (inode, error) {
697 return nil, os.ErrNotExist
699 if !recursive && node.IsDir() && node.Size() > 0 {
700 return node, ErrDirectoryNotEmpty
707 func (fs *fileSystem) Sync() error {
708 if syncer, ok := fs.root.(syncer); ok {
711 return ErrInvalidOperation
714 func (fs *fileSystem) Flush(string, bool) error {
715 log.Printf("TODO: flush fileSystem")
716 return ErrInvalidOperation
719 func (fs *fileSystem) MemorySize() int64 {
720 return fs.root.MemorySize()
723 // rlookup (recursive lookup) returns the inode for the file/directory
724 // with the given name (which may contain "/" separators). If no such
725 // file/directory exists, the returned node is nil.
726 func rlookup(start inode, path string) (node inode, err error) {
728 for _, name := range strings.Split(path, "/") {
730 if name == "." || name == "" {
738 node, err = func() (inode, error) {
741 return node.Child(name, nil)
743 if node == nil || err != nil {
747 if node == nil && err == nil {
753 func permittedName(name string) bool {
754 return name != "" && name != "." && name != ".." && !strings.Contains(name, "/")
757 // Snapshot returns a Subtree that's a copy of the given path. It
758 // returns an error if the path is not inside a collection.
759 func Snapshot(fs FileSystem, path string) (*Subtree, error) {
760 f, err := fs.OpenFile(path, os.O_RDONLY, 0)
768 // Splice inserts newsubtree at the indicated target path.
770 // Splice returns an error if target is not inside a collection.
772 // Splice returns an error if target is the root of a collection and
773 // newsubtree is a snapshot of a file.
774 func Splice(fs FileSystem, target string, newsubtree *Subtree) error {
775 f, err := fs.OpenFile(target, os.O_WRONLY, 0)
776 if os.IsNotExist(err) {
777 f, err = fs.OpenFile(target, os.O_CREATE|os.O_WRONLY, 0700)
780 return fmt.Errorf("open %s: %w", target, err)
783 return f.Splice(newsubtree)