12483: Track memory use. Flush filled blocks while writing.
[arvados.git] / sdk / go / arvados / collection_fs.go
index 2f1ec2fee43c1adea2ddc775e1884cac0583c6de..79626500c10acb8c392f8432ebc2add3abff4c7a 100644 (file)
@@ -5,7 +5,6 @@
 package arvados
 
 import (
-       "crypto/md5"
        "errors"
        "fmt"
        "io"
@@ -21,11 +20,12 @@ import (
 )
 
 var (
-       ErrReadOnlyFile     = errors.New("read-only file")
-       ErrNegativeOffset   = errors.New("cannot seek to negative offset")
-       ErrFileExists       = errors.New("file exists")
-       ErrInvalidOperation = errors.New("invalid operation")
-       ErrPermission       = os.ErrPermission
+       ErrReadOnlyFile      = errors.New("read-only file")
+       ErrNegativeOffset    = errors.New("cannot seek to negative offset")
+       ErrFileExists        = errors.New("file exists")
+       ErrInvalidOperation  = errors.New("invalid operation")
+       ErrDirectoryNotEmpty = errors.New("directory not empty")
+       ErrPermission        = os.ErrPermission
 
        maxBlockSize = 1 << 26
 )
@@ -43,6 +43,7 @@ type File interface {
 
 type keepClient interface {
        ReadAt(locator string, p []byte, off int) (int, error)
+       PutB(p []byte) (string, int, error)
 }
 
 type fileinfo struct {
@@ -82,10 +83,6 @@ func (fi fileinfo) Sys() interface{} {
        return nil
 }
 
-func (fi fileinfo) Stat() os.FileInfo {
-       return fi
-}
-
 // A CollectionFileSystem is an http.Filesystem plus Stat() and
 // support for opening writable files.
 type CollectionFileSystem interface {
@@ -93,6 +90,8 @@ type CollectionFileSystem interface {
        Stat(name string) (os.FileInfo, error)
        Create(name string) (File, error)
        OpenFile(name string, flag int, perm os.FileMode) (File, error)
+       Mkdir(name string, perm os.FileMode) error
+       Remove(name string) error
        MarshalManifest(string) (string, error)
 }
 
@@ -122,13 +121,12 @@ func (fs *fileSystem) Stat(name string) (os.FileInfo, error) {
 }
 
 type inode interface {
-       os.FileInfo
-       OpenFile(string, int, os.FileMode) (*file, error)
        Parent() inode
        Read([]byte, filenodePtr) (int, filenodePtr, error)
        Write([]byte, filenodePtr) (int, filenodePtr, error)
        Truncate(int64) error
        Readdir() []os.FileInfo
+       Size() int64
        Stat() os.FileInfo
        sync.Locker
        RLock()
@@ -137,10 +135,11 @@ type inode interface {
 
 // filenode implements inode.
 type filenode struct {
-       fileinfo
+       fileinfo fileinfo
        parent   *dirnode
        extents  []extent
        repacked int64 // number of times anything in []extents has changed len
+       memsize  int64 // bytes in memExtents
        sync.RWMutex
 }
 
@@ -223,10 +222,6 @@ func (fn *filenode) appendExtent(e extent) {
        fn.fileinfo.size += int64(e.Len())
 }
 
-func (fn *filenode) OpenFile(string, int, os.FileMode) (*file, error) {
-       return nil, os.ErrNotExist
-}
-
 func (fn *filenode) Parent() inode {
        return fn.parent
 }
@@ -262,20 +257,38 @@ func (fn *filenode) Read(p []byte, startPtr filenodePtr) (n int, ptr filenodePtr
        return
 }
 
+func (fn *filenode) Size() int64 {
+       fn.RLock()
+       defer fn.RUnlock()
+       return fn.fileinfo.Size()
+}
+
+func (fn *filenode) Stat() os.FileInfo {
+       fn.RLock()
+       defer fn.RUnlock()
+       return fn.fileinfo
+}
+
 func (fn *filenode) Truncate(size int64) error {
        fn.Lock()
        defer fn.Unlock()
        if size < fn.fileinfo.size {
                ptr := fn.seek(filenodePtr{off: size, repacked: fn.repacked - 1})
+               for i := ptr.extentIdx; i < len(fn.extents); i++ {
+                       if ext, ok := fn.extents[i].(*memExtent); ok {
+                               fn.memsize -= int64(ext.Len())
+                       }
+               }
                if ptr.extentOff == 0 {
                        fn.extents = fn.extents[:ptr.extentIdx]
                } else {
                        fn.extents = fn.extents[:ptr.extentIdx+1]
-                       e := fn.extents[ptr.extentIdx]
-                       if e, ok := e.(writableExtent); ok {
-                               e.Truncate(ptr.extentOff)
-                       } else {
-                               fn.extents[ptr.extentIdx] = e.Slice(0, ptr.extentOff)
+                       switch ext := fn.extents[ptr.extentIdx].(type) {
+                       case *memExtent:
+                               ext.Truncate(ptr.extentOff)
+                               fn.memsize += int64(ext.Len())
+                       default:
+                               fn.extents[ptr.extentIdx] = ext.Slice(0, ptr.extentOff)
                        }
                }
                fn.fileinfo.size = size
@@ -300,6 +313,7 @@ func (fn *filenode) Truncate(size int64) error {
                }
                e.Truncate(e.Len() + int(grow))
                fn.fileinfo.size += grow
+               fn.memsize += grow
        }
        return nil
 }
@@ -351,6 +365,7 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
                        prev++
                        e := &memExtent{}
                        e.Truncate(len(cando))
+                       fn.memsize += int64(len(cando))
                        fn.extents[cur] = e
                        fn.extents[prev] = fn.extents[prev].Slice(0, ptr.extentOff)
                        ptr.extentIdx++
@@ -392,6 +407,7 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
                                ptr.extentIdx--
                                ptr.extentOff = fn.extents[prev].Len()
                                fn.extents[prev].(writableExtent).Truncate(ptr.extentOff + len(cando))
+                               fn.memsize += int64(len(cando))
                                ptr.repacked++
                                fn.repacked++
                        } else {
@@ -407,6 +423,7 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
                                }
                                e := &memExtent{}
                                e.Truncate(len(cando))
+                               fn.memsize += int64(len(cando))
                                fn.extents[cur] = e
                                cur++
                                prev++
@@ -420,6 +437,9 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
 
                ptr.off += int64(len(cando))
                ptr.extentOff += len(cando)
+               if ptr.extentOff >= maxBlockSize {
+                       fn.pruneMemExtents()
+               }
                if fn.extents[ptr.extentIdx].Len() == ptr.extentOff {
                        ptr.extentOff = 0
                        ptr.extentIdx++
@@ -428,10 +448,38 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
        return
 }
 
+// Write some data out to disk to reduce memory use. Caller must have
+// write lock.
+func (fn *filenode) pruneMemExtents() {
+       // TODO: async (don't hold Lock() while waiting for Keep)
+       // TODO: share code with (*dirnode)sync()
+       // TODO: pack/flush small blocks too, when fragmented
+       for idx, ext := range fn.extents {
+               ext, ok := ext.(*memExtent)
+               if !ok || ext.Len() < maxBlockSize {
+                       continue
+               }
+               locator, _, err := fn.parent.kc.PutB(ext.buf)
+               if err != nil {
+                       // TODO: stall (or return errors from)
+                       // subsequent writes until flushing
+                       // starts to succeed
+                       continue
+               }
+               fn.memsize -= int64(ext.Len())
+               fn.extents[idx] = storedExtent{
+                       kc:      fn.parent.kc,
+                       locator: locator,
+                       size:    ext.Len(),
+                       offset:  0,
+                       length:  ext.Len(),
+               }
+       }
+}
+
 // FileSystem returns a CollectionFileSystem for the collection.
-func (c *Collection) FileSystem(client *Client, kc keepClient) CollectionFileSystem {
+func (c *Collection) FileSystem(client *Client, kc keepClient) (CollectionFileSystem, error) {
        fs := &fileSystem{dirnode: dirnode{
-               cache:    &keepBlockCache{kc: kc},
                client:   client,
                kc:       kc,
                fileinfo: fileinfo{name: ".", mode: os.ModeDir | 0755},
@@ -439,8 +487,10 @@ func (c *Collection) FileSystem(client *Client, kc keepClient) CollectionFileSys
                inodes:   make(map[string]inode),
        }}
        fs.dirnode.parent = &fs.dirnode
-       fs.dirnode.loadManifest(c.ManifestText)
-       return fs
+       if err := fs.dirnode.loadManifest(c.ManifestText); err != nil {
+               return nil, err
+       }
+       return fs, nil
 }
 
 type file struct {
@@ -495,7 +545,7 @@ func (f *file) Write(p []byte) (n int, err error) {
 }
 
 func (f *file) Readdir(count int) ([]os.FileInfo, error) {
-       if !f.inode.IsDir() {
+       if !f.inode.Stat().IsDir() {
                return nil, ErrInvalidOperation
        }
        if count <= 0 {
@@ -516,7 +566,7 @@ func (f *file) Readdir(count int) ([]os.FileInfo, error) {
 }
 
 func (f *file) Stat() (os.FileInfo, error) {
-       return f.inode, nil
+       return f.inode.Stat(), nil
 }
 
 func (f *file) Close() error {
@@ -524,21 +574,17 @@ func (f *file) Close() error {
        return nil
 }
 
-func (f *file) OpenFile(name string, flag int, perm os.FileMode) (*file, error) {
-       return f.inode.OpenFile(name, flag, perm)
-}
-
 type dirnode struct {
-       fileinfo
-       parent *dirnode
-       client *Client
-       kc     keepClient
-       cache  blockCache
-       inodes map[string]inode
+       fileinfo fileinfo
+       parent   *dirnode
+       client   *Client
+       kc       keepClient
+       inodes   map[string]inode
        sync.RWMutex
 }
 
-// caller must hold dn.Lock().
+// sync flushes in-memory data (for all files in the tree rooted at
+// dn) to persistent storage. Caller must hold dn.Lock().
 func (dn *dirnode) sync() error {
        type shortBlock struct {
                fn  *filenode
@@ -551,28 +597,26 @@ func (dn *dirnode) sync() error {
                if len(sbs) == 0 {
                        return nil
                }
-               hash := md5.New()
-               size := 0
+               block := make([]byte, 0, maxBlockSize)
                for _, sb := range sbs {
-                       data := sb.fn.extents[sb.idx].(*memExtent).buf
-                       if _, err := hash.Write(data); err != nil {
-                               return err
-                       }
-                       size += len(data)
+                       block = append(block, sb.fn.extents[sb.idx].(*memExtent).buf...)
+               }
+               locator, _, err := dn.kc.PutB(block)
+               if err != nil {
+                       return err
                }
-               // FIXME: write to keep
-               locator := fmt.Sprintf("%x+%d", hash.Sum(nil), size)
                off := 0
                for _, sb := range sbs {
                        data := sb.fn.extents[sb.idx].(*memExtent).buf
                        sb.fn.extents[sb.idx] = storedExtent{
-                               cache:   dn.cache,
+                               kc:      dn.kc,
                                locator: locator,
-                               size:    size,
+                               size:    len(block),
                                offset:  off,
                                length:  len(data),
                        }
                        off += len(data)
+                       sb.fn.memsize -= int64(len(data))
                }
                return nil
        }
@@ -618,10 +662,11 @@ func (dn *dirnode) sync() error {
 func (dn *dirnode) MarshalManifest(prefix string) (string, error) {
        dn.Lock()
        defer dn.Unlock()
-       if err := dn.sync(); err != nil {
-               return "", err
-       }
+       return dn.marshalManifest(prefix)
+}
 
+// caller must have read lock.
+func (dn *dirnode) marshalManifest(prefix string) (string, error) {
        var streamLen int64
        type m1segment struct {
                name   string
@@ -632,9 +677,15 @@ func (dn *dirnode) MarshalManifest(prefix string) (string, error) {
        var subdirs string
        var blocks []string
 
+       if err := dn.sync(); err != nil {
+               return "", err
+       }
+
        names := make([]string, 0, len(dn.inodes))
-       for name := range dn.inodes {
+       for name, node := range dn.inodes {
                names = append(names, name)
+               node.Lock()
+               defer node.Unlock()
        }
        sort.Strings(names)
 
@@ -642,7 +693,7 @@ func (dn *dirnode) MarshalManifest(prefix string) (string, error) {
                node := dn.inodes[name]
                switch node := node.(type) {
                case *dirnode:
-                       subdir, err := node.MarshalManifest(prefix + "/" + node.Name())
+                       subdir, err := node.marshalManifest(prefix + "/" + name)
                        if err != nil {
                                return "", err
                        }
@@ -650,14 +701,6 @@ func (dn *dirnode) MarshalManifest(prefix string) (string, error) {
                case *filenode:
                        for _, e := range node.extents {
                                switch e := e.(type) {
-                               case *memExtent:
-                                       blocks = append(blocks, fmt.Sprintf("FIXME+%d", e.Len()))
-                                       segments = append(segments, m1segment{
-                                               name:   node.Name(),
-                                               offset: streamLen,
-                                               length: int64(e.Len()),
-                                       })
-                                       streamLen += int64(e.Len())
                                case storedExtent:
                                        if len(blocks) > 0 && blocks[len(blocks)-1] == e.locator {
                                                streamLen -= int64(e.size)
@@ -665,12 +708,15 @@ func (dn *dirnode) MarshalManifest(prefix string) (string, error) {
                                                blocks = append(blocks, e.locator)
                                        }
                                        segments = append(segments, m1segment{
-                                               name:   node.Name(),
+                                               name:   name,
                                                offset: streamLen + int64(e.offset),
                                                length: int64(e.length),
                                        })
                                        streamLen += int64(e.size)
                                default:
+                                       // This can't happen: we
+                                       // haven't unlocked since
+                                       // calling sync().
                                        panic(fmt.Sprintf("can't marshal extent type %T", e))
                                }
                        }
@@ -680,36 +726,43 @@ func (dn *dirnode) MarshalManifest(prefix string) (string, error) {
        }
        var filetokens []string
        for _, s := range segments {
-               filetokens = append(filetokens, fmt.Sprintf("%d:%d:%s", s.offset, s.length, s.name))
+               filetokens = append(filetokens, fmt.Sprintf("%d:%d:%s", s.offset, s.length, manifestEscape(s.name)))
        }
        if len(filetokens) == 0 {
                return subdirs, nil
        } else if len(blocks) == 0 {
                blocks = []string{"d41d8cd98f00b204e9800998ecf8427e+0"}
        }
-       return prefix + " " + strings.Join(blocks, " ") + " " + strings.Join(filetokens, " ") + "\n" + subdirs, nil
+       return manifestEscape(prefix) + " " + strings.Join(blocks, " ") + " " + strings.Join(filetokens, " ") + "\n" + subdirs, nil
 }
 
-func (dn *dirnode) loadManifest(txt string) {
+func (dn *dirnode) loadManifest(txt string) error {
        // FIXME: faster
        var dirname string
-       for _, stream := range strings.Split(txt, "\n") {
+       streams := strings.Split(txt, "\n")
+       if streams[len(streams)-1] != "" {
+               return fmt.Errorf("line %d: no trailing newline", len(streams))
+       }
+       for i, stream := range streams[:len(streams)-1] {
+               lineno := i + 1
                var extents []storedExtent
+               var anyFileTokens bool
                for i, token := range strings.Split(stream, " ") {
                        if i == 0 {
                                dirname = manifestUnescape(token)
                                continue
                        }
                        if !strings.Contains(token, ":") {
+                               if anyFileTokens {
+                                       return fmt.Errorf("line %d: bad file segment %q", lineno, token)
+                               }
                                toks := strings.SplitN(token, "+", 3)
                                if len(toks) < 2 {
-                                       // FIXME: broken
-                                       continue
+                                       return fmt.Errorf("line %d: bad locator %q", lineno, token)
                                }
                                length, err := strconv.ParseInt(toks[1], 10, 32)
                                if err != nil || length < 0 {
-                                       // FIXME: broken
-                                       continue
+                                       return fmt.Errorf("line %d: bad locator %q", lineno, token)
                                }
                                extents = append(extents, storedExtent{
                                        locator: token,
@@ -718,33 +771,33 @@ func (dn *dirnode) loadManifest(txt string) {
                                        length:  int(length),
                                })
                                continue
+                       } else if len(extents) == 0 {
+                               return fmt.Errorf("line %d: bad locator %q", lineno, token)
                        }
+
                        toks := strings.Split(token, ":")
                        if len(toks) != 3 {
-                               // FIXME: broken manifest
-                               continue
+                               return fmt.Errorf("line %d: bad file segment %q", lineno, token)
                        }
+                       anyFileTokens = true
+
                        offset, err := strconv.ParseInt(toks[0], 10, 64)
                        if err != nil || offset < 0 {
-                               // FIXME: broken manifest
-                               continue
+                               return fmt.Errorf("line %d: bad file segment %q", lineno, token)
                        }
                        length, err := strconv.ParseInt(toks[1], 10, 64)
                        if err != nil || length < 0 {
-                               // FIXME: broken manifest
-                               continue
+                               return fmt.Errorf("line %d: bad file segment %q", lineno, token)
                        }
                        name := path.Clean(dirname + "/" + manifestUnescape(toks[2]))
                        dn.makeParentDirs(name)
                        f, err := dn.OpenFile(name, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0700)
                        if err != nil {
-                               // FIXME: broken
-                               continue
+                               return fmt.Errorf("line %d: cannot append to %q: %s", lineno, name, err)
                        }
                        if f.inode.Stat().IsDir() {
                                f.Close()
-                               // FIXME: broken manifest
-                               continue
+                               return fmt.Errorf("line %d: cannot append to %q: is a directory", lineno, name)
                        }
                        // Map the stream offset/range coordinates to
                        // block/offset/range coordinates and add
@@ -768,7 +821,7 @@ func (dn *dirnode) loadManifest(txt string) {
                                        blkLen = int(offset + length - pos - int64(blkOff))
                                }
                                f.inode.(*filenode).appendExtent(storedExtent{
-                                       cache:   dn.cache,
+                                       kc:      dn.kc,
                                        locator: e.locator,
                                        size:    e.size,
                                        offset:  blkOff,
@@ -777,41 +830,78 @@ func (dn *dirnode) loadManifest(txt string) {
                                pos = next
                        }
                        f.Close()
+                       if pos < offset+length {
+                               return fmt.Errorf("line %d: invalid segment in %d-byte stream: %q", lineno, pos, token)
+                       }
+               }
+               if !anyFileTokens {
+                       return fmt.Errorf("line %d: no file segments", lineno)
+               } else if len(extents) == 0 {
+                       return fmt.Errorf("line %d: no locators", lineno)
+               } else if dirname == "" {
+                       return fmt.Errorf("line %d: no stream name", lineno)
                }
        }
+       return nil
 }
 
-func (dn *dirnode) makeParentDirs(name string) {
+func (dn *dirnode) makeParentDirs(name string) (err error) {
        names := strings.Split(name, "/")
        for _, name := range names[:len(names)-1] {
-               dn.Lock()
-               defer dn.Unlock()
-               if n, ok := dn.inodes[name]; !ok {
-                       n := &dirnode{
-                               parent: dn,
-                               client: dn.client,
-                               kc:     dn.kc,
-                               fileinfo: fileinfo{
-                                       name: name,
-                                       mode: os.ModeDir | 0755,
-                               },
-                       }
-                       if dn.inodes == nil {
-                               dn.inodes = make(map[string]inode)
-                       }
-                       dn.inodes[name] = n
-                       dn.fileinfo.size++
-                       dn = n
-               } else if n, ok := n.(*dirnode); ok {
-                       dn = n
-               } else {
-                       // fail
-                       return
+               f, err := dn.mkdir(name)
+               if err != nil {
+                       return err
+               }
+               defer f.Close()
+               var ok bool
+               dn, ok = f.inode.(*dirnode)
+               if !ok {
+                       return ErrFileExists
                }
        }
+       return nil
+}
+
+func (dn *dirnode) mkdir(name string) (*file, error) {
+       return dn.OpenFile(name, os.O_CREATE|os.O_EXCL, os.ModeDir|0755)
+}
+
+func (dn *dirnode) Mkdir(name string, perm os.FileMode) error {
+       f, err := dn.mkdir(name)
+       if err != nil {
+               f.Close()
+       }
+       return err
+}
+
+func (dn *dirnode) Remove(name string) error {
+       dirname, name := path.Split(name)
+       if name == "" || name == "." || name == ".." {
+               return ErrInvalidOperation
+       }
+       dn, ok := dn.lookupPath(dirname).(*dirnode)
+       if !ok {
+               return os.ErrNotExist
+       }
+       dn.Lock()
+       defer dn.Unlock()
+       switch node := dn.inodes[name].(type) {
+       case nil:
+               return os.ErrNotExist
+       case *dirnode:
+               node.RLock()
+               defer node.RUnlock()
+               if len(node.inodes) > 0 {
+                       return ErrDirectoryNotEmpty
+               }
+       }
+       delete(dn.inodes, name)
+       return nil
 }
 
 func (dn *dirnode) Parent() inode {
+       dn.RLock()
+       defer dn.RUnlock()
        return dn.parent
 }
 
@@ -833,46 +923,94 @@ func (dn *dirnode) Write(p []byte, ptr filenodePtr) (int, filenodePtr, error) {
        return 0, ptr, ErrInvalidOperation
 }
 
+func (dn *dirnode) Size() int64 {
+       dn.RLock()
+       defer dn.RUnlock()
+       return dn.fileinfo.Size()
+}
+
+func (dn *dirnode) Stat() os.FileInfo {
+       dn.RLock()
+       defer dn.RUnlock()
+       return dn.fileinfo
+}
+
 func (dn *dirnode) Truncate(int64) error {
        return ErrInvalidOperation
 }
 
+// lookupPath returns the inode for the file/directory with the given
+// name (which may contain "/" separators), along with its parent
+// node. If no such file/directory exists, the returned node is nil.
+func (dn *dirnode) lookupPath(path string) (node inode) {
+       node = dn
+       for _, name := range strings.Split(path, "/") {
+               dn, ok := node.(*dirnode)
+               if !ok {
+                       return nil
+               }
+               if name == "." || name == "" {
+                       continue
+               }
+               if name == ".." {
+                       node = node.Parent()
+                       continue
+               }
+               dn.RLock()
+               node = dn.inodes[name]
+               dn.RUnlock()
+       }
+       return
+}
+
 func (dn *dirnode) OpenFile(name string, flag int, perm os.FileMode) (*file, error) {
-       name = strings.TrimSuffix(name, "/")
-       if name == "." || name == "" {
-               return &file{inode: dn}, nil
+       dirname, name := path.Split(name)
+       dn, ok := dn.lookupPath(dirname).(*dirnode)
+       if !ok {
+               return nil, os.ErrNotExist
        }
-       if dirname, name := path.Split(name); dirname != "" {
-               // OpenFile("foo/bar/baz") =>
-               // OpenFile("foo/bar").OpenFile("baz") (or
-               // ErrNotExist, if foo/bar is a file)
-               f, err := dn.OpenFile(dirname, os.O_RDONLY, 0)
-               if err != nil {
-                       return nil, err
-               }
-               defer f.Close()
-               if dn, ok := f.inode.(*dirnode); ok {
-                       return dn.OpenFile(name, flag, perm)
-               } else {
-                       return nil, os.ErrNotExist
+       writeMode := flag&(os.O_RDWR|os.O_WRONLY|os.O_CREATE) != 0
+       if !writeMode {
+               // A directory can be opened via "foo/", "foo/.", or
+               // "foo/..".
+               switch name {
+               case ".", "":
+                       return &file{inode: dn}, nil
+               case "..":
+                       return &file{inode: dn.Parent()}, nil
                }
        }
-       dn.Lock()
-       defer dn.Unlock()
-       if name == ".." {
-               return &file{inode: dn.parent}, nil
+       createMode := flag&os.O_CREATE != 0
+       if createMode {
+               dn.Lock()
+               defer dn.Unlock()
+       } else {
+               dn.RLock()
+               defer dn.RUnlock()
        }
        n, ok := dn.inodes[name]
        if !ok {
-               if flag&os.O_CREATE == 0 {
+               if !createMode {
                        return nil, os.ErrNotExist
                }
-               n = &filenode{
-                       parent: dn,
-                       fileinfo: fileinfo{
-                               name: name,
-                               mode: 0755,
-                       },
+               if perm.IsDir() {
+                       n = &dirnode{
+                               parent: dn,
+                               client: dn.client,
+                               kc:     dn.kc,
+                               fileinfo: fileinfo{
+                                       name: name,
+                                       mode: os.ModeDir | 0755,
+                               },
+                       }
+               } else {
+                       n = &filenode{
+                               parent: dn,
+                               fileinfo: fileinfo{
+                                       name: name,
+                                       mode: 0755,
+                               },
+                       }
                }
                if dn.inodes == nil {
                        dn.inodes = make(map[string]inode)
@@ -959,7 +1097,7 @@ func (me *memExtent) ReadAt(p []byte, off int64) (n int, err error) {
 }
 
 type storedExtent struct {
-       cache   blockCache
+       kc      keepClient
        locator string
        size    int
        offset  int
@@ -986,27 +1124,13 @@ func (se storedExtent) ReadAt(p []byte, off int64) (n int, err error) {
        maxlen := se.length - int(off)
        if len(p) > maxlen {
                p = p[:maxlen]
-               n, err = se.cache.ReadAt(se.locator, p, int(off)+se.offset)
+               n, err = se.kc.ReadAt(se.locator, p, int(off)+se.offset)
                if err == nil {
                        err = io.EOF
                }
                return
        }
-       return se.cache.ReadAt(se.locator, p, int(off)+se.offset)
-}
-
-type blockCache interface {
-       ReadAt(locator string, p []byte, off int) (n int, err error)
-}
-
-type keepBlockCache struct {
-       kc keepClient
-}
-
-var scratch = make([]byte, 2<<26)
-
-func (kbc *keepBlockCache) ReadAt(locator string, p []byte, off int) (int, error) {
-       return kbc.kc.ReadAt(locator, p, off)
+       return se.kc.ReadAt(se.locator, p, int(off)+se.offset)
 }
 
 func canonicalName(name string) string {
@@ -1021,7 +1145,7 @@ func canonicalName(name string) string {
 
 var manifestEscapeSeq = regexp.MustCompile(`\\([0-9]{3}|\\)`)
 
-func manifestUnescapeSeq(seq string) string {
+func manifestUnescapeFunc(seq string) string {
        if seq == `\\` {
                return `\`
        }
@@ -1034,5 +1158,15 @@ func manifestUnescapeSeq(seq string) string {
 }
 
 func manifestUnescape(s string) string {
-       return manifestEscapeSeq.ReplaceAllStringFunc(s, manifestUnescapeSeq)
+       return manifestEscapeSeq.ReplaceAllStringFunc(s, manifestUnescapeFunc)
+}
+
+var manifestEscapedChar = regexp.MustCompile(`[^\.\w/]`)
+
+func manifestEscapeFunc(seq string) string {
+       return fmt.Sprintf("\\%03o", byte(seq[0]))
+}
+
+func manifestEscape(s string) string {
+       return manifestEscapedChar.ReplaceAllStringFunc(s, manifestEscapeFunc)
 }