1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: Apache-2.0
23 ErrReadOnlyFile = errors.New("read-only file")
24 ErrNegativeOffset = errors.New("cannot seek to negative offset")
25 ErrFileExists = errors.New("file exists")
26 ErrInvalidOperation = errors.New("invalid operation")
27 ErrInvalidArgument = errors.New("invalid argument")
28 ErrDirectoryNotEmpty = errors.New("directory not empty")
29 ErrWriteOnlyMode = errors.New("file is O_WRONLY")
30 ErrSyncNotSupported = errors.New("O_SYNC flag is not supported")
31 ErrIsDirectory = errors.New("cannot rename file to overwrite existing directory")
32 ErrPermission = os.ErrPermission
34 maxBlockSize = 1 << 26
43 Readdir(int) ([]os.FileInfo, error)
44 Stat() (os.FileInfo, error)
48 type keepClient interface {
49 ReadAt(locator string, p []byte, off int) (int, error)
50 PutB(p []byte) (string, int, error)
53 type fileinfo struct {
60 // Name implements os.FileInfo.
61 func (fi fileinfo) Name() string {
65 // ModTime implements os.FileInfo.
66 func (fi fileinfo) ModTime() time.Time {
70 // Mode implements os.FileInfo.
71 func (fi fileinfo) Mode() os.FileMode {
75 // IsDir implements os.FileInfo.
76 func (fi fileinfo) IsDir() bool {
77 return fi.mode&os.ModeDir != 0
80 // Size implements os.FileInfo.
81 func (fi fileinfo) Size() int64 {
85 // Sys implements os.FileInfo.
86 func (fi fileinfo) Sys() interface{} {
90 // A CollectionFileSystem is an http.Filesystem plus Stat() and
91 // support for opening writable files. All methods are safe to call
92 // from multiple goroutines.
93 type CollectionFileSystem interface {
96 // analogous to os.Stat()
97 Stat(name string) (os.FileInfo, error)
99 // analogous to os.Create(): create/truncate a file and open it O_RDWR.
100 Create(name string) (File, error)
102 // Like os.OpenFile(): create or open a file or directory.
104 // If flag&os.O_EXCL==0, it opens an existing file or
105 // directory if one exists. If flag&os.O_CREATE!=0, it creates
106 // a new empty file or directory if one does not already
109 // When creating a new item, perm&os.ModeDir determines
110 // whether it is a file or a directory.
112 // A file can be opened multiple times and used concurrently
113 // from multiple goroutines. However, each File object should
114 // be used by only one goroutine at a time.
115 OpenFile(name string, flag int, perm os.FileMode) (File, error)
117 Mkdir(name string, perm os.FileMode) error
118 Remove(name string) error
119 RemoveAll(name string) error
120 Rename(oldname, newname string) error
121 MarshalManifest(prefix string) (string, error)
124 type fileSystem struct {
128 func (fs *fileSystem) OpenFile(name string, flag int, perm os.FileMode) (File, error) {
129 return fs.dirnode.OpenFile(name, flag, perm)
132 func (fs *fileSystem) Open(name string) (http.File, error) {
133 return fs.dirnode.OpenFile(name, os.O_RDONLY, 0)
136 func (fs *fileSystem) Create(name string) (File, error) {
137 return fs.dirnode.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0)
140 func (fs *fileSystem) Stat(name string) (fi os.FileInfo, err error) {
141 node := fs.dirnode.lookupPath(name)
150 type inode interface {
152 Read([]byte, filenodePtr) (int, filenodePtr, error)
153 Write([]byte, filenodePtr) (int, filenodePtr, error)
154 Truncate(int64) error
155 Readdir() []os.FileInfo
163 // filenode implements inode.
164 type filenode struct {
168 repacked int64 // number of times anything in []extents has changed len
169 memsize int64 // bytes in memExtents
173 // filenodePtr is an offset into a file that is (usually) efficient to
174 // seek to. Specifically, if filenode.repacked==filenodePtr.repacked
175 // then filenode.extents[filenodePtr.extentIdx][filenodePtr.extentOff]
176 // corresponds to file offset filenodePtr.off. Otherwise, it is
177 // necessary to reexamine len(filenode.extents[0]) etc. to find the
178 // correct extent and offset.
179 type filenodePtr struct {
186 // seek returns a ptr that is consistent with both startPtr.off and
187 // the current state of fn. The caller must already hold fn.RLock() or
190 // If startPtr points beyond the end of the file, ptr will point to
191 // exactly the end of the file.
195 // ptr.extentIdx == len(filenode.extents) // i.e., at EOF
197 // filenode.extents[ptr.extentIdx].Len() >= ptr.extentOff
198 func (fn *filenode) seek(startPtr filenodePtr) (ptr filenodePtr) {
201 // meaningless anyway
203 } else if ptr.off >= fn.fileinfo.size {
204 ptr.extentIdx = len(fn.extents)
206 ptr.repacked = fn.repacked
208 } else if ptr.repacked == fn.repacked {
209 // extentIdx and extentOff accurately reflect ptr.off,
210 // but might have fallen off the end of an extent
211 if ptr.extentOff >= fn.extents[ptr.extentIdx].Len() {
218 ptr.repacked = fn.repacked
220 if ptr.off >= fn.fileinfo.size {
221 ptr.extentIdx, ptr.extentOff = len(fn.extents), 0
224 // Recompute extentIdx and extentOff. We have already
225 // established fn.fileinfo.size > ptr.off >= 0, so we don't
226 // have to deal with edge cases here.
228 for ptr.extentIdx, ptr.extentOff = 0, 0; off < ptr.off; ptr.extentIdx++ {
229 // This would panic (index out of range) if
230 // fn.fileinfo.size were larger than
231 // sum(fn.extents[i].Len()) -- but that can't happen
232 // because we have ensured fn.fileinfo.size is always
234 extLen := int64(fn.extents[ptr.extentIdx].Len())
235 if off+extLen > ptr.off {
236 ptr.extentOff = int(ptr.off - off)
244 // caller must have lock
245 func (fn *filenode) appendExtent(e extent) {
246 fn.extents = append(fn.extents, e)
247 fn.fileinfo.size += int64(e.Len())
250 func (fn *filenode) Parent() inode {
254 func (fn *filenode) Readdir() []os.FileInfo {
258 func (fn *filenode) Read(p []byte, startPtr filenodePtr) (n int, ptr filenodePtr, err error) {
259 ptr = fn.seek(startPtr)
261 err = ErrNegativeOffset
264 if ptr.extentIdx >= len(fn.extents) {
268 n, err = fn.extents[ptr.extentIdx].ReadAt(p, int64(ptr.extentOff))
272 if ptr.extentOff == fn.extents[ptr.extentIdx].Len() {
275 if ptr.extentIdx < len(fn.extents) && err == io.EOF {
283 func (fn *filenode) Size() int64 {
286 return fn.fileinfo.Size()
289 func (fn *filenode) Stat() os.FileInfo {
295 func (fn *filenode) Truncate(size int64) error {
298 return fn.truncate(size)
301 func (fn *filenode) truncate(size int64) error {
302 if size == fn.fileinfo.size {
306 if size < fn.fileinfo.size {
307 ptr := fn.seek(filenodePtr{off: size})
308 for i := ptr.extentIdx; i < len(fn.extents); i++ {
309 if ext, ok := fn.extents[i].(*memExtent); ok {
310 fn.memsize -= int64(ext.Len())
313 if ptr.extentOff == 0 {
314 fn.extents = fn.extents[:ptr.extentIdx]
316 fn.extents = fn.extents[:ptr.extentIdx+1]
317 switch ext := fn.extents[ptr.extentIdx].(type) {
319 ext.Truncate(ptr.extentOff)
320 fn.memsize += int64(ext.Len())
322 fn.extents[ptr.extentIdx] = ext.Slice(0, ptr.extentOff)
325 fn.fileinfo.size = size
328 for size > fn.fileinfo.size {
329 grow := size - fn.fileinfo.size
332 if len(fn.extents) == 0 {
334 fn.extents = append(fn.extents, e)
335 } else if e, ok = fn.extents[len(fn.extents)-1].(writableExtent); !ok || e.Len() >= maxBlockSize {
337 fn.extents = append(fn.extents, e)
339 if maxgrow := int64(maxBlockSize - e.Len()); maxgrow < grow {
342 e.Truncate(e.Len() + int(grow))
343 fn.fileinfo.size += grow
349 // Caller must hold lock.
350 func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePtr, err error) {
351 if startPtr.off > fn.fileinfo.size {
352 if err = fn.truncate(startPtr.off); err != nil {
353 return 0, startPtr, err
356 ptr = fn.seek(startPtr)
358 err = ErrNegativeOffset
361 for len(p) > 0 && err == nil {
363 if len(cando) > maxBlockSize {
364 cando = cando[:maxBlockSize]
366 // Rearrange/grow fn.extents (and shrink cando if
367 // needed) such that cando can be copied to
368 // fn.extents[ptr.extentIdx] at offset ptr.extentOff.
370 prev := ptr.extentIdx - 1
372 if cur < len(fn.extents) {
373 _, curWritable = fn.extents[cur].(writableExtent)
375 var prevAppendable bool
376 if prev >= 0 && fn.extents[prev].Len() < maxBlockSize {
377 _, prevAppendable = fn.extents[prev].(writableExtent)
379 if ptr.extentOff > 0 && !curWritable {
380 // Split a non-writable block.
381 if max := fn.extents[cur].Len() - ptr.extentOff; max <= len(cando) {
382 // Truncate cur, and insert a new
385 fn.extents = append(fn.extents, nil)
386 copy(fn.extents[cur+1:], fn.extents[cur:])
388 // Split cur into two copies, truncate
389 // the one on the left, shift the one
390 // on the right, and insert a new
391 // extent between them.
392 fn.extents = append(fn.extents, nil, nil)
393 copy(fn.extents[cur+2:], fn.extents[cur:])
394 fn.extents[cur+2] = fn.extents[cur+2].Slice(ptr.extentOff+len(cando), -1)
399 e.Truncate(len(cando))
400 fn.memsize += int64(len(cando))
402 fn.extents[prev] = fn.extents[prev].Slice(0, ptr.extentOff)
407 } else if curWritable {
408 if fit := int(fn.extents[cur].Len()) - ptr.extentOff; fit < len(cando) {
413 // Shrink cando if needed to fit in prev extent.
414 if cangrow := maxBlockSize - fn.extents[prev].Len(); cangrow < len(cando) {
415 cando = cando[:cangrow]
419 if cur == len(fn.extents) {
420 // ptr is at EOF, filesize is changing.
421 fn.fileinfo.size += int64(len(cando))
422 } else if el := fn.extents[cur].Len(); el <= len(cando) {
423 // cando is long enough that we won't
424 // need cur any more. shrink cando to
425 // be exactly as long as cur
426 // (otherwise we'd accidentally shift
427 // the effective position of all
428 // extents after cur).
430 copy(fn.extents[cur:], fn.extents[cur+1:])
431 fn.extents = fn.extents[:len(fn.extents)-1]
433 // shrink cur by the same #bytes we're growing prev
434 fn.extents[cur] = fn.extents[cur].Slice(len(cando), -1)
440 ptr.extentOff = fn.extents[prev].Len()
441 fn.extents[prev].(writableExtent).Truncate(ptr.extentOff + len(cando))
442 fn.memsize += int64(len(cando))
446 // Insert an extent between prev and cur, and advance prev/cur.
447 fn.extents = append(fn.extents, nil)
448 if cur < len(fn.extents) {
449 copy(fn.extents[cur+1:], fn.extents[cur:])
453 // appending a new extent does
454 // not invalidate any ptrs
457 e.Truncate(len(cando))
458 fn.memsize += int64(len(cando))
465 // Finally we can copy bytes from cando to the current extent.
466 fn.extents[ptr.extentIdx].(writableExtent).WriteAt(cando, ptr.extentOff)
470 ptr.off += int64(len(cando))
471 ptr.extentOff += len(cando)
472 if ptr.extentOff >= maxBlockSize {
475 if fn.extents[ptr.extentIdx].Len() == ptr.extentOff {
483 // Write some data out to disk to reduce memory use. Caller must have
485 func (fn *filenode) pruneMemExtents() {
486 // TODO: async (don't hold Lock() while waiting for Keep)
487 // TODO: share code with (*dirnode)sync()
488 // TODO: pack/flush small blocks too, when fragmented
489 for idx, ext := range fn.extents {
490 ext, ok := ext.(*memExtent)
491 if !ok || ext.Len() < maxBlockSize {
494 locator, _, err := fn.parent.kc.PutB(ext.buf)
496 // TODO: stall (or return errors from)
497 // subsequent writes until flushing
501 fn.memsize -= int64(ext.Len())
502 fn.extents[idx] = storedExtent{
512 // FileSystem returns a CollectionFileSystem for the collection.
513 func (c *Collection) FileSystem(client *Client, kc keepClient) (CollectionFileSystem, error) {
514 fs := &fileSystem{dirnode: dirnode{
517 fileinfo: fileinfo{name: ".", mode: os.ModeDir | 0755},
519 inodes: make(map[string]inode),
521 fs.dirnode.parent = &fs.dirnode
522 if err := fs.dirnode.loadManifest(c.ManifestText); err != nil {
534 unreaddirs []os.FileInfo
537 func (f *file) Read(p []byte) (n int, err error) {
539 return 0, ErrWriteOnlyMode
542 defer f.inode.RUnlock()
543 n, f.ptr, err = f.inode.Read(p, f.ptr)
547 func (f *file) Seek(off int64, whence int) (pos int64, err error) {
548 size := f.inode.Size()
559 return f.ptr.off, ErrNegativeOffset
561 if ptr.off != f.ptr.off {
563 // force filenode to recompute f.ptr fields on next
567 return f.ptr.off, nil
570 func (f *file) Truncate(size int64) error {
571 return f.inode.Truncate(size)
574 func (f *file) Write(p []byte) (n int, err error) {
576 return 0, ErrReadOnlyFile
579 defer f.inode.Unlock()
580 if fn, ok := f.inode.(*filenode); ok && f.append {
582 off: fn.fileinfo.size,
583 extentIdx: len(fn.extents),
585 repacked: fn.repacked,
588 n, f.ptr, err = f.inode.Write(p, f.ptr)
592 func (f *file) Readdir(count int) ([]os.FileInfo, error) {
593 if !f.inode.Stat().IsDir() {
594 return nil, ErrInvalidOperation
597 return f.inode.Readdir(), nil
599 if f.unreaddirs == nil {
600 f.unreaddirs = f.inode.Readdir()
602 if len(f.unreaddirs) == 0 {
605 if count > len(f.unreaddirs) {
606 count = len(f.unreaddirs)
608 ret := f.unreaddirs[:count]
609 f.unreaddirs = f.unreaddirs[count:]
613 func (f *file) Stat() (os.FileInfo, error) {
614 return f.inode.Stat(), nil
617 func (f *file) Close() error {
622 type dirnode struct {
627 inodes map[string]inode
631 // sync flushes in-memory data (for all files in the tree rooted at
632 // dn) to persistent storage. Caller must hold dn.Lock().
633 func (dn *dirnode) sync() error {
634 type shortBlock struct {
638 var pending []shortBlock
641 flush := func(sbs []shortBlock) error {
645 block := make([]byte, 0, maxBlockSize)
646 for _, sb := range sbs {
647 block = append(block, sb.fn.extents[sb.idx].(*memExtent).buf...)
649 locator, _, err := dn.kc.PutB(block)
654 for _, sb := range sbs {
655 data := sb.fn.extents[sb.idx].(*memExtent).buf
656 sb.fn.extents[sb.idx] = storedExtent{
664 sb.fn.memsize -= int64(len(data))
669 names := make([]string, 0, len(dn.inodes))
670 for name := range dn.inodes {
671 names = append(names, name)
675 for _, name := range names {
676 fn, ok := dn.inodes[name].(*filenode)
682 for idx, ext := range fn.extents {
683 ext, ok := ext.(*memExtent)
687 if ext.Len() > maxBlockSize/2 {
688 if err := flush([]shortBlock{{fn, idx}}); err != nil {
693 if pendingLen+ext.Len() > maxBlockSize {
694 if err := flush(pending); err != nil {
700 pending = append(pending, shortBlock{fn, idx})
701 pendingLen += ext.Len()
704 return flush(pending)
707 func (dn *dirnode) MarshalManifest(prefix string) (string, error) {
710 return dn.marshalManifest(prefix)
713 // caller must have read lock.
714 func (dn *dirnode) marshalManifest(prefix string) (string, error) {
716 type m1segment struct {
721 var segments []m1segment
725 if err := dn.sync(); err != nil {
729 names := make([]string, 0, len(dn.inodes))
730 for name, node := range dn.inodes {
731 names = append(names, name)
737 for _, name := range names {
738 node := dn.inodes[name]
739 switch node := node.(type) {
741 subdir, err := node.marshalManifest(prefix + "/" + name)
745 subdirs = subdirs + subdir
747 if len(node.extents) == 0 {
748 segments = append(segments, m1segment{name: name})
751 for _, e := range node.extents {
752 switch e := e.(type) {
754 if len(blocks) > 0 && blocks[len(blocks)-1] == e.locator {
755 streamLen -= int64(e.size)
757 blocks = append(blocks, e.locator)
761 offset: streamLen + int64(e.offset),
762 length: int64(e.length),
764 if prev := len(segments) - 1; prev >= 0 &&
765 segments[prev].name == name &&
766 segments[prev].offset+segments[prev].length == next.offset {
767 segments[prev].length += next.length
769 segments = append(segments, next)
771 streamLen += int64(e.size)
773 // This can't happen: we
774 // haven't unlocked since
776 panic(fmt.Sprintf("can't marshal extent type %T", e))
780 panic(fmt.Sprintf("can't marshal inode type %T", node))
783 var filetokens []string
784 for _, s := range segments {
785 filetokens = append(filetokens, fmt.Sprintf("%d:%d:%s", s.offset, s.length, manifestEscape(s.name)))
787 if len(filetokens) == 0 {
789 } else if len(blocks) == 0 {
790 blocks = []string{"d41d8cd98f00b204e9800998ecf8427e+0"}
792 return manifestEscape(prefix) + " " + strings.Join(blocks, " ") + " " + strings.Join(filetokens, " ") + "\n" + subdirs, nil
795 func (dn *dirnode) loadManifest(txt string) error {
798 streams := strings.Split(txt, "\n")
799 if streams[len(streams)-1] != "" {
800 return fmt.Errorf("line %d: no trailing newline", len(streams))
802 var extents []storedExtent
803 for i, stream := range streams[:len(streams)-1] {
805 var anyFileTokens bool
808 extents = extents[:0]
809 for i, token := range strings.Split(stream, " ") {
811 dirname = manifestUnescape(token)
814 if !strings.Contains(token, ":") {
816 return fmt.Errorf("line %d: bad file segment %q", lineno, token)
818 toks := strings.SplitN(token, "+", 3)
820 return fmt.Errorf("line %d: bad locator %q", lineno, token)
822 length, err := strconv.ParseInt(toks[1], 10, 32)
823 if err != nil || length < 0 {
824 return fmt.Errorf("line %d: bad locator %q", lineno, token)
826 extents = append(extents, storedExtent{
833 } else if len(extents) == 0 {
834 return fmt.Errorf("line %d: bad locator %q", lineno, token)
837 toks := strings.Split(token, ":")
839 return fmt.Errorf("line %d: bad file segment %q", lineno, token)
843 offset, err := strconv.ParseInt(toks[0], 10, 64)
844 if err != nil || offset < 0 {
845 return fmt.Errorf("line %d: bad file segment %q", lineno, token)
847 length, err := strconv.ParseInt(toks[1], 10, 64)
848 if err != nil || length < 0 {
849 return fmt.Errorf("line %d: bad file segment %q", lineno, token)
851 name := dirname + "/" + manifestUnescape(toks[2])
852 fnode, err := dn.createFileAndParents(name)
854 return fmt.Errorf("line %d: cannot use path %q: %s", lineno, name, err)
856 // Map the stream offset/range coordinates to
857 // block/offset/range coordinates and add
858 // corresponding storedExtents to the filenode
860 // Can't continue where we left off.
861 // TODO: binary search instead of
862 // rewinding all the way (but this
863 // situation might be rare anyway)
866 for next := int64(0); extIdx < len(extents); extIdx, pos = extIdx+1, next {
868 next = pos + int64(e.Len())
869 if next <= offset || e.Len() == 0 {
873 if pos >= offset+length {
878 blkOff = int(offset - pos)
880 blkLen := e.Len() - blkOff
881 if pos+int64(blkOff+blkLen) > offset+length {
882 blkLen = int(offset + length - pos - int64(blkOff))
884 fnode.appendExtent(storedExtent{
891 if next > offset+length {
895 if extIdx == len(extents) && pos < offset+length {
896 return fmt.Errorf("line %d: invalid segment in %d-byte stream: %q", lineno, pos, token)
900 return fmt.Errorf("line %d: no file segments", lineno)
901 } else if len(extents) == 0 {
902 return fmt.Errorf("line %d: no locators", lineno)
903 } else if dirname == "" {
904 return fmt.Errorf("line %d: no stream name", lineno)
910 // only safe to call from loadManifest -- no locking
911 func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) {
912 names := strings.Split(path, "/")
913 if basename := names[len(names)-1]; basename == "" || basename == "." || basename == ".." {
914 err = fmt.Errorf("invalid filename")
918 for i, name := range names {
919 dn, ok := node.(*dirnode)
924 if name == "" || name == "." {
931 node, ok = dn.inodes[name]
933 if i == len(names)-1 {
934 fn = dn.newFilenode(name, 0755)
937 node = dn.newDirnode(name, 0755)
941 if fn, ok = node.(*filenode); !ok {
942 err = ErrInvalidArgument
947 func (dn *dirnode) mkdir(name string) (*file, error) {
948 return dn.OpenFile(name, os.O_CREATE|os.O_EXCL, os.ModeDir|0755)
951 func (dn *dirnode) Mkdir(name string, perm os.FileMode) error {
952 f, err := dn.mkdir(name)
959 func (dn *dirnode) Remove(name string) error {
960 return dn.remove(name, false)
963 func (dn *dirnode) RemoveAll(name string) error {
964 return dn.remove(name, true)
967 func (dn *dirnode) remove(name string, recursive bool) error {
968 dirname, name := path.Split(name)
969 if name == "" || name == "." || name == ".." {
970 return ErrInvalidArgument
972 dn, ok := dn.lookupPath(dirname).(*dirnode)
974 return os.ErrNotExist
978 switch node := dn.inodes[name].(type) {
980 return os.ErrNotExist
984 if !recursive && len(node.inodes) > 0 {
985 return ErrDirectoryNotEmpty
988 delete(dn.inodes, name)
992 func (dn *dirnode) Rename(oldname, newname string) error {
993 olddir, oldname := path.Split(oldname)
994 if oldname == "" || oldname == "." || oldname == ".." {
995 return ErrInvalidArgument
997 olddirf, err := dn.OpenFile(olddir+".", os.O_RDONLY, 0)
999 return fmt.Errorf("%q: %s", olddir, err)
1001 defer olddirf.Close()
1002 newdir, newname := path.Split(newname)
1003 if newname == "." || newname == ".." {
1004 return ErrInvalidArgument
1005 } else if newname == "" {
1006 // Rename("a/b", "c/") means Rename("a/b", "c/b")
1009 newdirf, err := dn.OpenFile(newdir+".", os.O_RDONLY, 0)
1011 return fmt.Errorf("%q: %s", newdir, err)
1013 defer newdirf.Close()
1015 // When acquiring locks on multiple nodes, all common
1016 // ancestors must be locked first in order to avoid
1017 // deadlock. This is assured by locking the path from root to
1018 // newdir, then locking the path from root to olddir, skipping
1019 // any already-locked nodes.
1020 needLock := []sync.Locker{}
1021 for _, f := range []*file{olddirf, newdirf} {
1023 needLock = append(needLock, node)
1024 for node.Parent() != node {
1025 node = node.Parent()
1026 needLock = append(needLock, node)
1029 locked := map[sync.Locker]bool{}
1030 for i := len(needLock) - 1; i >= 0; i-- {
1031 if n := needLock[i]; !locked[n] {
1038 olddn := olddirf.inode.(*dirnode)
1039 newdn := newdirf.inode.(*dirnode)
1040 oldinode, ok := olddn.inodes[oldname]
1042 return os.ErrNotExist
1044 if existing, ok := newdn.inodes[newname]; ok {
1045 // overwriting an existing file or dir
1046 if dn, ok := existing.(*dirnode); ok {
1047 if !oldinode.Stat().IsDir() {
1048 return ErrIsDirectory
1052 if len(dn.inodes) > 0 {
1053 return ErrDirectoryNotEmpty
1057 newdn.fileinfo.size++
1059 newdn.inodes[newname] = oldinode
1060 delete(olddn.inodes, oldname)
1061 olddn.fileinfo.size--
1065 func (dn *dirnode) Parent() inode {
1071 func (dn *dirnode) Readdir() (fi []os.FileInfo) {
1074 fi = make([]os.FileInfo, 0, len(dn.inodes))
1075 for _, inode := range dn.inodes {
1076 fi = append(fi, inode.Stat())
1081 func (dn *dirnode) Read(p []byte, ptr filenodePtr) (int, filenodePtr, error) {
1082 return 0, ptr, ErrInvalidOperation
1085 func (dn *dirnode) Write(p []byte, ptr filenodePtr) (int, filenodePtr, error) {
1086 return 0, ptr, ErrInvalidOperation
1089 func (dn *dirnode) Size() int64 {
1092 return dn.fileinfo.Size()
1095 func (dn *dirnode) Stat() os.FileInfo {
1101 func (dn *dirnode) Truncate(int64) error {
1102 return ErrInvalidOperation
1105 // lookupPath returns the inode for the file/directory with the given
1106 // name (which may contain "/" separators), along with its parent
1107 // node. If no such file/directory exists, the returned node is nil.
1108 func (dn *dirnode) lookupPath(path string) (node inode) {
1110 for _, name := range strings.Split(path, "/") {
1111 dn, ok := node.(*dirnode)
1115 if name == "." || name == "" {
1119 node = node.Parent()
1123 node = dn.inodes[name]
1129 func (dn *dirnode) newDirnode(name string, perm os.FileMode) *dirnode {
1136 mode: os.ModeDir | perm,
1139 if dn.inodes == nil {
1140 dn.inodes = make(map[string]inode)
1142 dn.inodes[name] = child
1147 func (dn *dirnode) newFilenode(name string, perm os.FileMode) *filenode {
1155 if dn.inodes == nil {
1156 dn.inodes = make(map[string]inode)
1158 dn.inodes[name] = child
1163 // OpenFile is analogous to os.OpenFile().
1164 func (dn *dirnode) OpenFile(name string, flag int, perm os.FileMode) (*file, error) {
1165 if flag&os.O_SYNC != 0 {
1166 return nil, ErrSyncNotSupported
1168 dirname, name := path.Split(name)
1169 dn, ok := dn.lookupPath(dirname).(*dirnode)
1171 return nil, os.ErrNotExist
1173 var readable, writable bool
1174 switch flag & (os.O_RDWR | os.O_RDONLY | os.O_WRONLY) {
1183 return nil, fmt.Errorf("invalid flags 0x%x", flag)
1186 // A directory can be opened via "foo/", "foo/.", or
1190 return &file{inode: dn}, nil
1192 return &file{inode: dn.Parent()}, nil
1195 createMode := flag&os.O_CREATE != 0
1203 n, ok := dn.inodes[name]
1206 return nil, os.ErrNotExist
1209 n = dn.newDirnode(name, 0755)
1211 n = dn.newFilenode(name, 0755)
1213 } else if flag&os.O_EXCL != 0 {
1214 return nil, ErrFileExists
1215 } else if flag&os.O_TRUNC != 0 {
1217 return nil, fmt.Errorf("invalid flag O_TRUNC in read-only mode")
1218 } else if fn, ok := n.(*filenode); !ok {
1219 return nil, fmt.Errorf("invalid flag O_TRUNC when opening directory")
1226 append: flag&os.O_APPEND != 0,
1232 type extent interface {
1235 // Return a new extent with a subsection of the data from this
1236 // one. length<0 means length=Len()-off.
1237 Slice(off int, length int) extent
1240 type writableExtent interface {
1242 WriteAt(p []byte, off int)
1246 type memExtent struct {
1250 func (me *memExtent) Len() int {
1254 func (me *memExtent) Slice(off, length int) extent {
1256 length = len(me.buf) - off
1258 buf := make([]byte, length)
1259 copy(buf, me.buf[off:])
1260 return &memExtent{buf: buf}
1263 func (me *memExtent) Truncate(n int) {
1264 if n > cap(me.buf) {
1267 newsize = newsize << 2
1269 newbuf := make([]byte, n, newsize)
1270 copy(newbuf, me.buf)
1273 // Zero unused part when shrinking, in case we grow
1274 // and start using it again later.
1275 for i := n; i < len(me.buf); i++ {
1282 func (me *memExtent) WriteAt(p []byte, off int) {
1283 if off+len(p) > len(me.buf) {
1284 panic("overflowed extent")
1286 copy(me.buf[off:], p)
1289 func (me *memExtent) ReadAt(p []byte, off int64) (n int, err error) {
1290 if off > int64(me.Len()) {
1294 n = copy(p, me.buf[int(off):])
1301 type storedExtent struct {
1309 func (se storedExtent) Len() int {
1313 func (se storedExtent) Slice(n, size int) extent {
1316 if size >= 0 && se.length > size {
1322 func (se storedExtent) ReadAt(p []byte, off int64) (n int, err error) {
1323 if off > int64(se.length) {
1326 maxlen := se.length - int(off)
1327 if len(p) > maxlen {
1329 n, err = se.kc.ReadAt(se.locator, p, int(off)+se.offset)
1335 return se.kc.ReadAt(se.locator, p, int(off)+se.offset)
1338 func canonicalName(name string) string {
1339 name = path.Clean("/" + name)
1340 if name == "/" || name == "./" {
1342 } else if strings.HasPrefix(name, "/") {
1348 var manifestEscapeSeq = regexp.MustCompile(`\\([0-9]{3}|\\)`)
1350 func manifestUnescapeFunc(seq string) string {
1354 i, err := strconv.ParseUint(seq[1:], 8, 8)
1356 // Invalid escape sequence: can't unescape.
1359 return string([]byte{byte(i)})
1362 func manifestUnescape(s string) string {
1363 return manifestEscapeSeq.ReplaceAllStringFunc(s, manifestUnescapeFunc)
1366 var manifestEscapedChar = regexp.MustCompile(`[^\.\w/]`)
1368 func manifestEscapeFunc(seq string) string {
1369 return fmt.Sprintf("\\%03o", byte(seq[0]))
1372 func manifestEscape(s string) string {
1373 return manifestEscapedChar.ReplaceAllStringFunc(s, manifestEscapeFunc)