1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: Apache-2.0
21 ErrReadOnlyFile = errors.New("read-only file")
22 ErrNegativeOffset = errors.New("cannot seek to negative offset")
23 ErrFileExists = errors.New("file exists")
24 ErrInvalidOperation = errors.New("invalid operation")
25 ErrInvalidArgument = errors.New("invalid argument")
26 ErrDirectoryNotEmpty = errors.New("directory not empty")
27 ErrWriteOnlyMode = errors.New("file is O_WRONLY")
28 ErrSyncNotSupported = errors.New("O_SYNC flag is not supported")
29 ErrIsDirectory = errors.New("cannot rename file to overwrite existing directory")
30 ErrNotADirectory = errors.New("not a directory")
31 ErrPermission = os.ErrPermission
34 type syncer interface {
38 // A File is an *os.File-like interface for reading and writing files
46 Readdir(int) ([]os.FileInfo, error)
47 Stat() (os.FileInfo, error)
52 // A FileSystem is an http.Filesystem plus Stat() and support for
53 // opening writable files. All methods are safe to call from multiple
55 type FileSystem interface {
61 // filesystem-wide lock: used by Rename() to prevent deadlock
62 // while locking multiple inodes.
65 // throttle for limiting concurrent background writers
68 // create a new node with nil parent.
69 newNode(name string, perm os.FileMode, modTime time.Time) (node inode, err error)
71 // analogous to os.Stat()
72 Stat(name string) (os.FileInfo, error)
74 // analogous to os.Create(): create/truncate a file and open it O_RDWR.
75 Create(name string) (File, error)
77 // Like os.OpenFile(): create or open a file or directory.
79 // If flag&os.O_EXCL==0, it opens an existing file or
80 // directory if one exists. If flag&os.O_CREATE!=0, it creates
81 // a new empty file or directory if one does not already
84 // When creating a new item, perm&os.ModeDir determines
85 // whether it is a file or a directory.
87 // A file can be opened multiple times and used concurrently
88 // from multiple goroutines. However, each File object should
89 // be used by only one goroutine at a time.
90 OpenFile(name string, flag int, perm os.FileMode) (File, error)
92 Mkdir(name string, perm os.FileMode) error
93 Remove(name string) error
94 RemoveAll(name string) error
95 Rename(oldname, newname string) error
97 // Write buffered data from memory to storage, returning when
98 // all updates have been saved to persistent storage.
101 // Write buffered data from memory to storage, but don't wait
102 // for all writes to finish before returning. If shortBlocks
103 // is true, flush everything; otherwise, if there's less than
104 // a full block of buffered data at the end of a stream, leave
105 // it buffered in memory in case more data can be appended. If
106 // path is "", flush all dirs/streams; otherwise, flush only
107 // the specified dir/stream.
108 Flush(path string, shortBlocks bool) error
110 // Estimate current memory usage.
114 type inode interface {
115 SetParent(parent inode, name string)
118 Read([]byte, filenodePtr) (int, filenodePtr, error)
119 Write([]byte, filenodePtr) (int, filenodePtr, error)
120 Truncate(int64) error
122 Readdir() ([]os.FileInfo, error)
124 FileInfo() os.FileInfo
126 // Child() performs lookups and updates of named child nodes.
128 // (The term "child" here is used strictly. This means name is
129 // not "." or "..", and name does not contain "/".)
131 // If replace is non-nil, Child calls replace(x) where x is
132 // the current child inode with the given name. If possible,
133 // the child inode is replaced with the one returned by
136 // If replace(x) returns an inode (besides x or nil) that is
137 // subsequently returned by Child(), then Child()'s caller
138 // must ensure the new child's name and parent are set/updated
139 // to Child()'s name argument and its receiver respectively.
140 // This is not necessarily done before replace(x) returns, but
141 // it must be done before Child()'s caller releases the
144 // Nil represents "no child". replace(nil) signifies that no
145 // child with this name exists yet. If replace() returns nil,
146 // the existing child should be deleted if possible.
148 // An implementation of Child() is permitted to ignore
149 // replace() or its return value. For example, a regular file
150 // inode does not have children, so Child() always returns
153 // Child() returns the child, if any, with the given name: if
154 // a child was added or changed, the new child is returned.
156 // Caller must have lock (or rlock if replace is nil).
157 Child(name string, replace func(inode) (inode, error)) (inode, error)
165 type fileinfo struct {
172 // Name implements os.FileInfo.
173 func (fi fileinfo) Name() string {
177 // ModTime implements os.FileInfo.
178 func (fi fileinfo) ModTime() time.Time {
182 // Mode implements os.FileInfo.
183 func (fi fileinfo) Mode() os.FileMode {
187 // IsDir implements os.FileInfo.
188 func (fi fileinfo) IsDir() bool {
189 return fi.mode&os.ModeDir != 0
192 // Size implements os.FileInfo.
193 func (fi fileinfo) Size() int64 {
197 // Sys implements os.FileInfo.
198 func (fi fileinfo) Sys() interface{} {
202 type nullnode struct{}
204 func (*nullnode) Mkdir(string, os.FileMode) error {
205 return ErrInvalidOperation
208 func (*nullnode) Read([]byte, filenodePtr) (int, filenodePtr, error) {
209 return 0, filenodePtr{}, ErrInvalidOperation
212 func (*nullnode) Write([]byte, filenodePtr) (int, filenodePtr, error) {
213 return 0, filenodePtr{}, ErrInvalidOperation
216 func (*nullnode) Truncate(int64) error {
217 return ErrInvalidOperation
220 func (*nullnode) FileInfo() os.FileInfo {
224 func (*nullnode) IsDir() bool {
228 func (*nullnode) Readdir() ([]os.FileInfo, error) {
229 return nil, ErrInvalidOperation
232 func (*nullnode) Child(name string, replace func(inode) (inode, error)) (inode, error) {
233 return nil, ErrNotADirectory
236 func (*nullnode) MemorySize() int64 {
237 // Types that embed nullnode should report their own size, but
238 // if they don't, we at least report a non-zero size to ensure
239 // a large tree doesn't get reported as 0 bytes.
243 type treenode struct {
246 inodes map[string]inode
252 func (n *treenode) FS() FileSystem {
256 func (n *treenode) SetParent(p inode, name string) {
260 n.fileinfo.name = name
263 func (n *treenode) Parent() inode {
269 func (n *treenode) IsDir() bool {
273 func (n *treenode) Child(name string, replace func(inode) (inode, error)) (child inode, err error) {
274 child = n.inodes[name]
275 if name == "" || name == "." || name == ".." {
276 err = ErrInvalidArgument
282 newchild, err := replace(child)
287 delete(n.inodes, name)
288 } else if newchild != child {
289 n.inodes[name] = newchild
290 n.fileinfo.modTime = time.Now()
296 func (n *treenode) Size() int64 {
297 return n.FileInfo().Size()
300 func (n *treenode) FileInfo() os.FileInfo {
303 n.fileinfo.size = int64(len(n.inodes))
307 func (n *treenode) Readdir() (fi []os.FileInfo, err error) {
310 fi = make([]os.FileInfo, 0, len(n.inodes))
311 for _, inode := range n.inodes {
312 fi = append(fi, inode.FileInfo())
317 func (n *treenode) Sync() error {
320 for _, inode := range n.inodes {
321 syncer, ok := inode.(syncer)
323 return ErrInvalidOperation
333 func (n *treenode) MemorySize() (size int64) {
336 for _, inode := range n.inodes {
337 size += inode.MemorySize()
342 type fileSystem struct {
349 func (fs *fileSystem) rootnode() inode {
353 func (fs *fileSystem) throttle() *throttle {
357 func (fs *fileSystem) locker() sync.Locker {
361 // OpenFile is analogous to os.OpenFile().
362 func (fs *fileSystem) OpenFile(name string, flag int, perm os.FileMode) (File, error) {
363 return fs.openFile(name, flag, perm)
366 func (fs *fileSystem) openFile(name string, flag int, perm os.FileMode) (*filehandle, error) {
367 if flag&os.O_SYNC != 0 {
368 return nil, ErrSyncNotSupported
370 dirname, name := path.Split(name)
371 parent, err := rlookup(fs.root, dirname)
375 var readable, writable bool
376 switch flag & (os.O_RDWR | os.O_RDONLY | os.O_WRONLY) {
385 return nil, fmt.Errorf("invalid flags 0x%x", flag)
387 if !writable && parent.IsDir() {
388 // A directory can be opened via "foo/", "foo/.", or
392 return &filehandle{inode: parent}, nil
394 return &filehandle{inode: parent.Parent()}, nil
397 createMode := flag&os.O_CREATE != 0
400 defer parent.Unlock()
403 defer parent.RUnlock()
405 n, err := parent.Child(name, nil)
410 return nil, os.ErrNotExist
412 n, err = parent.Child(name, func(inode) (repl inode, err error) {
413 repl, err = parent.FS().newNode(name, perm|0755, time.Now())
417 repl.SetParent(parent, name)
423 // Parent rejected new child, but returned no error
424 return nil, ErrInvalidArgument
426 } else if flag&os.O_EXCL != 0 {
427 return nil, ErrFileExists
428 } else if flag&os.O_TRUNC != 0 {
430 return nil, fmt.Errorf("invalid flag O_TRUNC in read-only mode")
431 } else if n.IsDir() {
432 return nil, fmt.Errorf("invalid flag O_TRUNC when opening directory")
433 } else if err := n.Truncate(0); err != nil {
439 append: flag&os.O_APPEND != 0,
445 func (fs *fileSystem) Open(name string) (http.File, error) {
446 return fs.OpenFile(name, os.O_RDONLY, 0)
449 func (fs *fileSystem) Create(name string) (File, error) {
450 return fs.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0)
453 func (fs *fileSystem) Mkdir(name string, perm os.FileMode) error {
454 dirname, name := path.Split(name)
455 n, err := rlookup(fs.root, dirname)
461 if child, err := n.Child(name, nil); err != nil {
463 } else if child != nil {
467 _, err = n.Child(name, func(inode) (repl inode, err error) {
468 repl, err = n.FS().newNode(name, perm|os.ModeDir, time.Now())
472 repl.SetParent(n, name)
478 func (fs *fileSystem) Stat(name string) (os.FileInfo, error) {
479 node, err := rlookup(fs.root, name)
483 return node.FileInfo(), nil
486 func (fs *fileSystem) Rename(oldname, newname string) error {
487 olddir, oldname := path.Split(oldname)
488 if oldname == "" || oldname == "." || oldname == ".." {
489 return ErrInvalidArgument
491 olddirf, err := fs.openFile(olddir+".", os.O_RDONLY, 0)
493 return fmt.Errorf("%q: %s", olddir, err)
495 defer olddirf.Close()
497 newdir, newname := path.Split(newname)
498 if newname == "." || newname == ".." {
499 return ErrInvalidArgument
500 } else if newname == "" {
501 // Rename("a/b", "c/") means Rename("a/b", "c/b")
504 newdirf, err := fs.openFile(newdir+".", os.O_RDONLY, 0)
506 return fmt.Errorf("%q: %s", newdir, err)
508 defer newdirf.Close()
510 // TODO: If the nearest common ancestor ("nca") of olddirf and
511 // newdirf is on a different filesystem than fs, we should
512 // call nca.FS().Rename() instead of proceeding. Until then
513 // it's awkward for filesystems to implement their own Rename
514 // methods effectively: the only one that runs is the one on
515 // the root FileSystem exposed to the caller (webdav, fuse,
518 // When acquiring locks on multiple inodes, avoid deadlock by
519 // locking the entire containing filesystem first.
520 cfs := olddirf.inode.FS()
522 defer cfs.locker().Unlock()
524 if cfs != newdirf.inode.FS() {
525 // Moving inodes across filesystems is not (yet)
526 // supported. Locking inodes from different
527 // filesystems could deadlock, so we must error out
529 return ErrInvalidArgument
532 // To ensure we can test reliably whether we're about to move
533 // a directory into itself, lock all potential common
534 // ancestors of olddir and newdir.
535 needLock := []sync.Locker{}
536 for _, node := range []inode{olddirf.inode, newdirf.inode} {
537 needLock = append(needLock, node)
538 for node.Parent() != node && node.Parent().FS() == node.FS() {
540 needLock = append(needLock, node)
543 locked := map[sync.Locker]bool{}
544 for i := len(needLock) - 1; i >= 0; i-- {
545 if n := needLock[i]; !locked[n] {
552 _, err = olddirf.inode.Child(oldname, func(oldinode inode) (inode, error) {
554 return oldinode, os.ErrNotExist
556 if locked[oldinode] {
557 // oldinode cannot become a descendant of itself.
558 return oldinode, ErrInvalidArgument
560 if oldinode.FS() != cfs && newdirf.inode != olddirf.inode {
561 // moving a mount point to a different parent
562 // is not (yet) supported.
563 return oldinode, ErrInvalidArgument
565 accepted, err := newdirf.inode.Child(newname, func(existing inode) (inode, error) {
566 if existing != nil && existing.IsDir() {
567 return existing, ErrIsDirectory
572 // Leave oldinode in olddir.
575 accepted.SetParent(newdirf.inode, newname)
581 func (fs *fileSystem) Remove(name string) error {
582 return fs.remove(strings.TrimRight(name, "/"), false)
585 func (fs *fileSystem) RemoveAll(name string) error {
586 err := fs.remove(strings.TrimRight(name, "/"), true)
587 if os.IsNotExist(err) {
588 // "If the path does not exist, RemoveAll returns
589 // nil." (see "os" pkg)
595 func (fs *fileSystem) remove(name string, recursive bool) error {
596 dirname, name := path.Split(name)
597 if name == "" || name == "." || name == ".." {
598 return ErrInvalidArgument
600 dir, err := rlookup(fs.root, dirname)
606 _, err = dir.Child(name, func(node inode) (inode, error) {
608 return nil, os.ErrNotExist
610 if !recursive && node.IsDir() && node.Size() > 0 {
611 return node, ErrDirectoryNotEmpty
618 func (fs *fileSystem) Sync() error {
619 if syncer, ok := fs.root.(syncer); ok {
622 return ErrInvalidOperation
625 func (fs *fileSystem) Flush(string, bool) error {
626 log.Printf("TODO: flush fileSystem")
627 return ErrInvalidOperation
630 func (fs *fileSystem) MemorySize() int64 {
631 return fs.root.MemorySize()
634 // rlookup (recursive lookup) returns the inode for the file/directory
635 // with the given name (which may contain "/" separators). If no such
636 // file/directory exists, the returned node is nil.
637 func rlookup(start inode, path string) (node inode, err error) {
639 for _, name := range strings.Split(path, "/") {
641 if name == "." || name == "" {
649 node, err = func() (inode, error) {
652 return node.Child(name, nil)
654 if node == nil || err != nil {
658 if node == nil && err == nil {
664 func permittedName(name string) bool {
665 return name != "" && name != "." && name != ".." && !strings.Contains(name, "/")