12483: Speed up manifest loading.
[arvados.git] / sdk / go / arvados / collection_fs.go
index 1214de4e10268345e68b19fc6e066b3592424279..45c0731b0805bd36426261d590b4c44798f141f5 100644 (file)
@@ -24,7 +24,11 @@ var (
        ErrNegativeOffset    = errors.New("cannot seek to negative offset")
        ErrFileExists        = errors.New("file exists")
        ErrInvalidOperation  = errors.New("invalid operation")
+       ErrInvalidArgument   = errors.New("invalid argument")
        ErrDirectoryNotEmpty = errors.New("directory not empty")
+       ErrWriteOnlyMode     = errors.New("file is O_WRONLY")
+       ErrSyncNotSupported  = errors.New("O_SYNC flag is not supported")
+       ErrIsDirectory       = errors.New("cannot rename file to overwrite existing directory")
        ErrPermission        = os.ErrPermission
 
        maxBlockSize = 1 << 26
@@ -83,20 +87,38 @@ func (fi fileinfo) Sys() interface{} {
        return nil
 }
 
-func (fi fileinfo) Stat() os.FileInfo {
-       return fi
-}
-
 // A CollectionFileSystem is an http.Filesystem plus Stat() and
-// support for opening writable files.
+// support for opening writable files. All methods are safe to call
+// from multiple goroutines.
 type CollectionFileSystem interface {
        http.FileSystem
+
+       // analogous to os.Stat()
        Stat(name string) (os.FileInfo, error)
+
+       // analogous to os.Create(): create/truncate a file and open it O_RDWR.
        Create(name string) (File, error)
+
+       // Like os.OpenFile(): create or open a file or directory.
+       //
+       // If flag&os.O_EXCL==0, it opens an existing file or
+       // directory if one exists. If flag&os.O_CREATE!=0, it creates
+       // a new empty file or directory if one does not already
+       // exist.
+       //
+       // When creating a new item, perm&os.ModeDir determines
+       // whether it is a file or a directory.
+       //
+       // A file can be opened multiple times and used concurrently
+       // from multiple goroutines. However, each File object should
+       // be used by only one goroutine at a time.
        OpenFile(name string, flag int, perm os.FileMode) (File, error)
+
        Mkdir(name string, perm os.FileMode) error
        Remove(name string) error
-       MarshalManifest(string) (string, error)
+       RemoveAll(name string) error
+       Rename(oldname, newname string) error
+       MarshalManifest(prefix string) (string, error)
 }
 
 type fileSystem struct {
@@ -125,12 +147,12 @@ func (fs *fileSystem) Stat(name string) (os.FileInfo, error) {
 }
 
 type inode interface {
-       os.FileInfo
        Parent() inode
        Read([]byte, filenodePtr) (int, filenodePtr, error)
        Write([]byte, filenodePtr) (int, filenodePtr, error)
        Truncate(int64) error
        Readdir() []os.FileInfo
+       Size() int64
        Stat() os.FileInfo
        sync.Locker
        RLock()
@@ -139,10 +161,11 @@ type inode interface {
 
 // filenode implements inode.
 type filenode struct {
-       fileinfo
+       fileinfo fileinfo
        parent   *dirnode
        extents  []extent
        repacked int64 // number of times anything in []extents has changed len
+       memsize  int64 // bytes in memExtents
        sync.RWMutex
 }
 
@@ -177,7 +200,6 @@ func (fn *filenode) seek(startPtr filenodePtr) (ptr filenodePtr) {
                // meaningless anyway
                return
        } else if ptr.off >= fn.fileinfo.size {
-               ptr.off = fn.fileinfo.size
                ptr.extentIdx = len(fn.extents)
                ptr.extentOff = 0
                ptr.repacked = fn.repacked
@@ -234,8 +256,6 @@ func (fn *filenode) Readdir() []os.FileInfo {
 }
 
 func (fn *filenode) Read(p []byte, startPtr filenodePtr) (n int, ptr filenodePtr, err error) {
-       fn.RLock()
-       defer fn.RUnlock()
        ptr = fn.seek(startPtr)
        if ptr.off < 0 {
                err = ErrNegativeOffset
@@ -260,24 +280,49 @@ func (fn *filenode) Read(p []byte, startPtr filenodePtr) (n int, ptr filenodePtr
        return
 }
 
+func (fn *filenode) Size() int64 {
+       fn.RLock()
+       defer fn.RUnlock()
+       return fn.fileinfo.Size()
+}
+
+func (fn *filenode) Stat() os.FileInfo {
+       fn.RLock()
+       defer fn.RUnlock()
+       return fn.fileinfo
+}
+
 func (fn *filenode) Truncate(size int64) error {
        fn.Lock()
        defer fn.Unlock()
+       return fn.truncate(size)
+}
+
+func (fn *filenode) truncate(size int64) error {
+       if size == fn.fileinfo.size {
+               return nil
+       }
+       fn.repacked++
        if size < fn.fileinfo.size {
-               ptr := fn.seek(filenodePtr{off: size, repacked: fn.repacked - 1})
+               ptr := fn.seek(filenodePtr{off: size})
+               for i := ptr.extentIdx; i < len(fn.extents); i++ {
+                       if ext, ok := fn.extents[i].(*memExtent); ok {
+                               fn.memsize -= int64(ext.Len())
+                       }
+               }
                if ptr.extentOff == 0 {
                        fn.extents = fn.extents[:ptr.extentIdx]
                } else {
                        fn.extents = fn.extents[:ptr.extentIdx+1]
-                       e := fn.extents[ptr.extentIdx]
-                       if e, ok := e.(writableExtent); ok {
-                               e.Truncate(ptr.extentOff)
-                       } else {
-                               fn.extents[ptr.extentIdx] = e.Slice(0, ptr.extentOff)
+                       switch ext := fn.extents[ptr.extentIdx].(type) {
+                       case *memExtent:
+                               ext.Truncate(ptr.extentOff)
+                               fn.memsize += int64(ext.Len())
+                       default:
+                               fn.extents[ptr.extentIdx] = ext.Slice(0, ptr.extentOff)
                        }
                }
                fn.fileinfo.size = size
-               fn.repacked++
                return nil
        }
        for size > fn.fileinfo.size {
@@ -290,21 +335,24 @@ func (fn *filenode) Truncate(size int64) error {
                } else if e, ok = fn.extents[len(fn.extents)-1].(writableExtent); !ok || e.Len() >= maxBlockSize {
                        e = &memExtent{}
                        fn.extents = append(fn.extents, e)
-               } else {
-                       fn.repacked++
                }
                if maxgrow := int64(maxBlockSize - e.Len()); maxgrow < grow {
                        grow = maxgrow
                }
                e.Truncate(e.Len() + int(grow))
                fn.fileinfo.size += grow
+               fn.memsize += grow
        }
        return nil
 }
 
+// Caller must hold lock.
 func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePtr, err error) {
-       fn.Lock()
-       defer fn.Unlock()
+       if startPtr.off > fn.fileinfo.size {
+               if err = fn.truncate(startPtr.off); err != nil {
+                       return 0, startPtr, err
+               }
+       }
        ptr = fn.seek(startPtr)
        if ptr.off < 0 {
                err = ErrNegativeOffset
@@ -349,6 +397,7 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
                        prev++
                        e := &memExtent{}
                        e.Truncate(len(cando))
+                       fn.memsize += int64(len(cando))
                        fn.extents[cur] = e
                        fn.extents[prev] = fn.extents[prev].Slice(0, ptr.extentOff)
                        ptr.extentIdx++
@@ -390,6 +439,7 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
                                ptr.extentIdx--
                                ptr.extentOff = fn.extents[prev].Len()
                                fn.extents[prev].(writableExtent).Truncate(ptr.extentOff + len(cando))
+                               fn.memsize += int64(len(cando))
                                ptr.repacked++
                                fn.repacked++
                        } else {
@@ -405,6 +455,7 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
                                }
                                e := &memExtent{}
                                e.Truncate(len(cando))
+                               fn.memsize += int64(len(cando))
                                fn.extents[cur] = e
                                cur++
                                prev++
@@ -418,6 +469,9 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
 
                ptr.off += int64(len(cando))
                ptr.extentOff += len(cando)
+               if ptr.extentOff >= maxBlockSize {
+                       fn.pruneMemExtents()
+               }
                if fn.extents[ptr.extentIdx].Len() == ptr.extentOff {
                        ptr.extentOff = 0
                        ptr.extentIdx++
@@ -426,6 +480,35 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
        return
 }
 
+// Write some data out to disk to reduce memory use. Caller must have
+// write lock.
+func (fn *filenode) pruneMemExtents() {
+       // TODO: async (don't hold Lock() while waiting for Keep)
+       // TODO: share code with (*dirnode)sync()
+       // TODO: pack/flush small blocks too, when fragmented
+       for idx, ext := range fn.extents {
+               ext, ok := ext.(*memExtent)
+               if !ok || ext.Len() < maxBlockSize {
+                       continue
+               }
+               locator, _, err := fn.parent.kc.PutB(ext.buf)
+               if err != nil {
+                       // TODO: stall (or return errors from)
+                       // subsequent writes until flushing
+                       // starts to succeed
+                       continue
+               }
+               fn.memsize -= int64(ext.Len())
+               fn.extents[idx] = storedExtent{
+                       kc:      fn.parent.kc,
+                       locator: locator,
+                       size:    ext.Len(),
+                       offset:  0,
+                       length:  ext.Len(),
+               }
+       }
+}
+
 // FileSystem returns a CollectionFileSystem for the collection.
 func (c *Collection) FileSystem(client *Client, kc keepClient) (CollectionFileSystem, error) {
        fs := &fileSystem{dirnode: dirnode{
@@ -446,11 +529,17 @@ type file struct {
        inode
        ptr        filenodePtr
        append     bool
+       readable   bool
        writable   bool
        unreaddirs []os.FileInfo
 }
 
 func (f *file) Read(p []byte) (n int, err error) {
+       if !f.readable {
+               return 0, ErrWriteOnlyMode
+       }
+       f.inode.RLock()
+       defer f.inode.RUnlock()
        n, f.ptr, err = f.inode.Read(p, f.ptr)
        return
 }
@@ -469,9 +558,6 @@ func (f *file) Seek(off int64, whence int) (pos int64, err error) {
        if ptr.off < 0 {
                return f.ptr.off, ErrNegativeOffset
        }
-       if ptr.off > size {
-               ptr.off = size
-       }
        if ptr.off != f.ptr.off {
                f.ptr = ptr
                // force filenode to recompute f.ptr fields on next
@@ -489,12 +575,22 @@ func (f *file) Write(p []byte) (n int, err error) {
        if !f.writable {
                return 0, ErrReadOnlyFile
        }
+       f.inode.Lock()
+       defer f.inode.Unlock()
+       if fn, ok := f.inode.(*filenode); ok && f.append {
+               f.ptr = filenodePtr{
+                       off:       fn.fileinfo.size,
+                       extentIdx: len(fn.extents),
+                       extentOff: 0,
+                       repacked:  fn.repacked,
+               }
+       }
        n, f.ptr, err = f.inode.Write(p, f.ptr)
        return
 }
 
 func (f *file) Readdir(count int) ([]os.FileInfo, error) {
-       if !f.inode.IsDir() {
+       if !f.inode.Stat().IsDir() {
                return nil, ErrInvalidOperation
        }
        if count <= 0 {
@@ -515,7 +611,7 @@ func (f *file) Readdir(count int) ([]os.FileInfo, error) {
 }
 
 func (f *file) Stat() (os.FileInfo, error) {
-       return f.inode, nil
+       return f.inode.Stat(), nil
 }
 
 func (f *file) Close() error {
@@ -524,15 +620,16 @@ func (f *file) Close() error {
 }
 
 type dirnode struct {
-       fileinfo
-       parent *dirnode
-       client *Client
-       kc     keepClient
-       inodes map[string]inode
+       fileinfo fileinfo
+       parent   *dirnode
+       client   *Client
+       kc       keepClient
+       inodes   map[string]inode
        sync.RWMutex
 }
 
-// caller must hold dn.Lock().
+// sync flushes in-memory data (for all files in the tree rooted at
+// dn) to persistent storage. Caller must hold dn.Lock().
 func (dn *dirnode) sync() error {
        type shortBlock struct {
                fn  *filenode
@@ -564,6 +661,7 @@ func (dn *dirnode) sync() error {
                                length:  len(data),
                        }
                        off += len(data)
+                       sb.fn.memsize -= int64(len(data))
                }
                return nil
        }
@@ -640,12 +738,16 @@ func (dn *dirnode) marshalManifest(prefix string) (string, error) {
                node := dn.inodes[name]
                switch node := node.(type) {
                case *dirnode:
-                       subdir, err := node.marshalManifest(prefix + "/" + node.Name())
+                       subdir, err := node.marshalManifest(prefix + "/" + name)
                        if err != nil {
                                return "", err
                        }
                        subdirs = subdirs + subdir
                case *filenode:
+                       if len(node.extents) == 0 {
+                               segments = append(segments, m1segment{name: name})
+                               break
+                       }
                        for _, e := range node.extents {
                                switch e := e.(type) {
                                case storedExtent:
@@ -654,11 +756,18 @@ func (dn *dirnode) marshalManifest(prefix string) (string, error) {
                                        } else {
                                                blocks = append(blocks, e.locator)
                                        }
-                                       segments = append(segments, m1segment{
-                                               name:   node.Name(),
+                                       next := m1segment{
+                                               name:   name,
                                                offset: streamLen + int64(e.offset),
                                                length: int64(e.length),
-                                       })
+                                       }
+                                       if prev := len(segments) - 1; prev >= 0 &&
+                                               segments[prev].name == name &&
+                                               segments[prev].offset+segments[prev].length == next.offset {
+                                               segments[prev].length += next.length
+                                       } else {
+                                               segments = append(segments, next)
+                                       }
                                        streamLen += int64(e.size)
                                default:
                                        // This can't happen: we
@@ -694,6 +803,8 @@ func (dn *dirnode) loadManifest(txt string) error {
                lineno := i + 1
                var extents []storedExtent
                var anyFileTokens bool
+               var pos int64
+               var extIdx int
                for i, token := range strings.Split(stream, " ") {
                        if i == 0 {
                                dirname = manifestUnescape(token)
@@ -737,26 +848,28 @@ func (dn *dirnode) loadManifest(txt string) error {
                                return fmt.Errorf("line %d: bad file segment %q", lineno, token)
                        }
                        name := path.Clean(dirname + "/" + manifestUnescape(toks[2]))
-                       dn.makeParentDirs(name)
-                       f, err := dn.OpenFile(name, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0700)
+                       fnode, err := dn.createFileAndParents(name)
                        if err != nil {
-                               return fmt.Errorf("line %d: cannot append to %q: %s", lineno, name, err)
-                       }
-                       if f.inode.Stat().IsDir() {
-                               f.Close()
-                               return fmt.Errorf("line %d: cannot append to %q: is a directory", lineno, name)
+                               return fmt.Errorf("line %d: cannot use path %q: %s", lineno, name, err)
                        }
                        // Map the stream offset/range coordinates to
                        // block/offset/range coordinates and add
                        // corresponding storedExtents to the filenode
-                       var pos int64
-                       for _, e := range extents {
-                               next := pos + int64(e.Len())
-                               if next < offset {
+                       if pos > offset {
+                               // Can't continue where we left off.
+                               // TODO: binary search instead of
+                               // rewinding all the way (but this
+                               // situation might be rare anyway)
+                               extIdx, pos = 0, 0
+                       }
+                       for next := int64(0); extIdx < len(extents); extIdx, pos = extIdx+1, next {
+                               e := extents[extIdx]
+                               next = pos + int64(e.Len())
+                               if next <= offset || e.Len() == 0 {
                                        pos = next
                                        continue
                                }
-                               if pos > offset+length {
+                               if pos >= offset+length {
                                        break
                                }
                                var blkOff int
@@ -767,17 +880,18 @@ func (dn *dirnode) loadManifest(txt string) error {
                                if pos+int64(blkOff+blkLen) > offset+length {
                                        blkLen = int(offset + length - pos - int64(blkOff))
                                }
-                               f.inode.(*filenode).appendExtent(storedExtent{
+                               fnode.appendExtent(storedExtent{
                                        kc:      dn.kc,
                                        locator: e.locator,
                                        size:    e.size,
                                        offset:  blkOff,
                                        length:  blkLen,
                                })
-                               pos = next
+                               if next > offset+length {
+                                       break
+                               }
                        }
-                       f.Close()
-                       if pos < offset+length {
+                       if extIdx == len(extents) && pos < offset+length {
                                return fmt.Errorf("line %d: invalid segment in %d-byte stream: %q", lineno, pos, token)
                        }
                }
@@ -792,21 +906,41 @@ func (dn *dirnode) loadManifest(txt string) error {
        return nil
 }
 
-func (dn *dirnode) makeParentDirs(name string) (err error) {
-       names := strings.Split(name, "/")
-       for _, name := range names[:len(names)-1] {
-               f, err := dn.mkdir(name)
-               if err != nil {
-                       return err
+// only safe to call from loadManifest -- no locking
+func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) {
+       names := strings.Split(path, "/")
+       if basename := names[len(names)-1]; basename == "" || basename == "." || basename == ".." {
+               err = fmt.Errorf("invalid filename")
+               return
+       }
+       var node inode = dn
+       for i, name := range names {
+               dn, ok := node.(*dirnode)
+               if !ok {
+                       err = ErrFileExists
+                       return
                }
-               defer f.Close()
-               var ok bool
-               dn, ok = f.inode.(*dirnode)
+               if name == "" || name == "." {
+                       continue
+               }
+               if name == ".." {
+                       node = dn.parent
+                       continue
+               }
+               node, ok = dn.inodes[name]
                if !ok {
-                       return ErrFileExists
+                       if i == len(names)-1 {
+                               fn = dn.newFilenode(name, 0755)
+                               return
+                       }
+                       node = dn.newDirnode(name, 0755)
                }
        }
-       return nil
+       var ok bool
+       if fn, ok = node.(*filenode); !ok {
+               err = ErrInvalidArgument
+       }
+       return
 }
 
 func (dn *dirnode) mkdir(name string) (*file, error) {
@@ -815,16 +949,24 @@ func (dn *dirnode) mkdir(name string) (*file, error) {
 
 func (dn *dirnode) Mkdir(name string, perm os.FileMode) error {
        f, err := dn.mkdir(name)
-       if err != nil {
-               f.Close()
+       if err == nil {
+               err = f.Close()
        }
        return err
 }
 
 func (dn *dirnode) Remove(name string) error {
+       return dn.remove(name, false)
+}
+
+func (dn *dirnode) RemoveAll(name string) error {
+       return dn.remove(name, true)
+}
+
+func (dn *dirnode) remove(name string, recursive bool) error {
        dirname, name := path.Split(name)
        if name == "" || name == "." || name == ".." {
-               return ErrInvalidOperation
+               return ErrInvalidArgument
        }
        dn, ok := dn.lookupPath(dirname).(*dirnode)
        if !ok {
@@ -838,7 +980,7 @@ func (dn *dirnode) Remove(name string) error {
        case *dirnode:
                node.RLock()
                defer node.RUnlock()
-               if len(node.inodes) > 0 {
+               if !recursive && len(node.inodes) > 0 {
                        return ErrDirectoryNotEmpty
                }
        }
@@ -846,6 +988,79 @@ func (dn *dirnode) Remove(name string) error {
        return nil
 }
 
+func (dn *dirnode) Rename(oldname, newname string) error {
+       olddir, oldname := path.Split(oldname)
+       if oldname == "" || oldname == "." || oldname == ".." {
+               return ErrInvalidArgument
+       }
+       olddirf, err := dn.OpenFile(olddir+".", os.O_RDONLY, 0)
+       if err != nil {
+               return fmt.Errorf("%q: %s", olddir, err)
+       }
+       defer olddirf.Close()
+       newdir, newname := path.Split(newname)
+       if newname == "." || newname == ".." {
+               return ErrInvalidArgument
+       } else if newname == "" {
+               // Rename("a/b", "c/") means Rename("a/b", "c/b")
+               newname = oldname
+       }
+       newdirf, err := dn.OpenFile(newdir+".", os.O_RDONLY, 0)
+       if err != nil {
+               return fmt.Errorf("%q: %s", newdir, err)
+       }
+       defer newdirf.Close()
+
+       // When acquiring locks on multiple nodes, all common
+       // ancestors must be locked first in order to avoid
+       // deadlock. This is assured by locking the path from root to
+       // newdir, then locking the path from root to olddir, skipping
+       // any already-locked nodes.
+       needLock := []sync.Locker{}
+       for _, f := range []*file{olddirf, newdirf} {
+               node := f.inode
+               needLock = append(needLock, node)
+               for node.Parent() != node {
+                       node = node.Parent()
+                       needLock = append(needLock, node)
+               }
+       }
+       locked := map[sync.Locker]bool{}
+       for i := len(needLock) - 1; i >= 0; i-- {
+               if n := needLock[i]; !locked[n] {
+                       n.Lock()
+                       defer n.Unlock()
+                       locked[n] = true
+               }
+       }
+
+       olddn := olddirf.inode.(*dirnode)
+       newdn := newdirf.inode.(*dirnode)
+       oldinode, ok := olddn.inodes[oldname]
+       if !ok {
+               return os.ErrNotExist
+       }
+       if existing, ok := newdn.inodes[newname]; ok {
+               // overwriting an existing file or dir
+               if dn, ok := existing.(*dirnode); ok {
+                       if !oldinode.Stat().IsDir() {
+                               return ErrIsDirectory
+                       }
+                       dn.RLock()
+                       defer dn.RUnlock()
+                       if len(dn.inodes) > 0 {
+                               return ErrDirectoryNotEmpty
+                       }
+               }
+       } else {
+               newdn.fileinfo.size++
+       }
+       newdn.inodes[newname] = oldinode
+       delete(olddn.inodes, oldname)
+       olddn.fileinfo.size--
+       return nil
+}
+
 func (dn *dirnode) Parent() inode {
        dn.RLock()
        defer dn.RUnlock()
@@ -870,6 +1085,18 @@ func (dn *dirnode) Write(p []byte, ptr filenodePtr) (int, filenodePtr, error) {
        return 0, ptr, ErrInvalidOperation
 }
 
+func (dn *dirnode) Size() int64 {
+       dn.RLock()
+       defer dn.RUnlock()
+       return dn.fileinfo.Size()
+}
+
+func (dn *dirnode) Stat() os.FileInfo {
+       dn.RLock()
+       defer dn.RUnlock()
+       return dn.fileinfo
+}
+
 func (dn *dirnode) Truncate(int64) error {
        return ErrInvalidOperation
 }
@@ -898,14 +1125,63 @@ func (dn *dirnode) lookupPath(path string) (node inode) {
        return
 }
 
+func (dn *dirnode) newDirnode(name string, perm os.FileMode) *dirnode {
+       child := &dirnode{
+               parent: dn,
+               client: dn.client,
+               kc:     dn.kc,
+               fileinfo: fileinfo{
+                       name: name,
+                       mode: os.ModeDir | perm,
+               },
+       }
+       if dn.inodes == nil {
+               dn.inodes = make(map[string]inode)
+       }
+       dn.inodes[name] = child
+       dn.fileinfo.size++
+       return child
+}
+
+func (dn *dirnode) newFilenode(name string, perm os.FileMode) *filenode {
+       child := &filenode{
+               parent: dn,
+               fileinfo: fileinfo{
+                       name: name,
+                       mode: perm,
+               },
+       }
+       if dn.inodes == nil {
+               dn.inodes = make(map[string]inode)
+       }
+       dn.inodes[name] = child
+       dn.fileinfo.size++
+       return child
+}
+
+// OpenFile is analogous to os.OpenFile().
 func (dn *dirnode) OpenFile(name string, flag int, perm os.FileMode) (*file, error) {
+       if flag&os.O_SYNC != 0 {
+               return nil, ErrSyncNotSupported
+       }
        dirname, name := path.Split(name)
        dn, ok := dn.lookupPath(dirname).(*dirnode)
        if !ok {
                return nil, os.ErrNotExist
        }
-       writeMode := flag&(os.O_RDWR|os.O_WRONLY|os.O_CREATE) != 0
-       if !writeMode {
+       var readable, writable bool
+       switch flag & (os.O_RDWR | os.O_RDONLY | os.O_WRONLY) {
+       case os.O_RDWR:
+               readable = true
+               writable = true
+       case os.O_RDONLY:
+               readable = true
+       case os.O_WRONLY:
+               writable = true
+       default:
+               return nil, fmt.Errorf("invalid flags 0x%x", flag)
+       }
+       if !writable {
                // A directory can be opened via "foo/", "foo/.", or
                // "foo/..".
                switch name {
@@ -929,36 +1205,26 @@ func (dn *dirnode) OpenFile(name string, flag int, perm os.FileMode) (*file, err
                        return nil, os.ErrNotExist
                }
                if perm.IsDir() {
-                       n = &dirnode{
-                               parent: dn,
-                               client: dn.client,
-                               kc:     dn.kc,
-                               fileinfo: fileinfo{
-                                       name: name,
-                                       mode: os.ModeDir | 0755,
-                               },
-                       }
+                       n = dn.newDirnode(name, 0755)
                } else {
-                       n = &filenode{
-                               parent: dn,
-                               fileinfo: fileinfo{
-                                       name: name,
-                                       mode: 0755,
-                               },
-                       }
+                       n = dn.newFilenode(name, 0755)
                }
-               if dn.inodes == nil {
-                       dn.inodes = make(map[string]inode)
-               }
-               dn.inodes[name] = n
-               dn.fileinfo.size++
        } else if flag&os.O_EXCL != 0 {
                return nil, ErrFileExists
+       } else if flag&os.O_TRUNC != 0 {
+               if !writable {
+                       return nil, fmt.Errorf("invalid flag O_TRUNC in read-only mode")
+               } else if fn, ok := n.(*filenode); !ok {
+                       return nil, fmt.Errorf("invalid flag O_TRUNC when opening directory")
+               } else {
+                       fn.Truncate(0)
+               }
        }
        return &file{
                inode:    n,
                append:   flag&os.O_APPEND != 0,
-               writable: flag&(os.O_WRONLY|os.O_RDWR) != 0,
+               readable: readable,
+               writable: writable,
        }, nil
 }