1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: Apache-2.0
23 ErrReadOnlyFile = errors.New("read-only file")
24 ErrNegativeOffset = errors.New("cannot seek to negative offset")
25 ErrFileExists = errors.New("file exists")
26 ErrInvalidOperation = errors.New("invalid operation")
27 ErrInvalidArgument = errors.New("invalid argument")
28 ErrDirectoryNotEmpty = errors.New("directory not empty")
29 ErrWriteOnlyMode = errors.New("file is O_WRONLY")
30 ErrSyncNotSupported = errors.New("O_SYNC flag is not supported")
31 ErrIsDirectory = errors.New("cannot rename file to overwrite existing directory")
32 ErrPermission = os.ErrPermission
34 maxBlockSize = 1 << 26
43 Readdir(int) ([]os.FileInfo, error)
44 Stat() (os.FileInfo, error)
48 type keepClient interface {
49 ReadAt(locator string, p []byte, off int) (int, error)
50 PutB(p []byte) (string, int, error)
53 type fileinfo struct {
60 // Name implements os.FileInfo.
61 func (fi fileinfo) Name() string {
65 // ModTime implements os.FileInfo.
66 func (fi fileinfo) ModTime() time.Time {
70 // Mode implements os.FileInfo.
71 func (fi fileinfo) Mode() os.FileMode {
75 // IsDir implements os.FileInfo.
76 func (fi fileinfo) IsDir() bool {
77 return fi.mode&os.ModeDir != 0
80 // Size implements os.FileInfo.
81 func (fi fileinfo) Size() int64 {
85 // Sys implements os.FileInfo.
86 func (fi fileinfo) Sys() interface{} {
90 // A CollectionFileSystem is an http.Filesystem plus Stat() and
91 // support for opening writable files. All methods are safe to call
92 // from multiple goroutines.
93 type CollectionFileSystem interface {
96 // analogous to os.Stat()
97 Stat(name string) (os.FileInfo, error)
99 // analogous to os.Create(): create/truncate a file and open it O_RDWR.
100 Create(name string) (File, error)
102 // Like os.OpenFile(): create or open a file or directory.
104 // If flag&os.O_EXCL==0, it opens an existing file or
105 // directory if one exists. If flag&os.O_CREATE!=0, it creates
106 // a new empty file or directory if one does not already
109 // When creating a new item, perm&os.ModeDir determines
110 // whether it is a file or a directory.
112 // A file can be opened multiple times and used concurrently
113 // from multiple goroutines. However, each File object should
114 // be used by only one goroutine at a time.
115 OpenFile(name string, flag int, perm os.FileMode) (File, error)
117 Mkdir(name string, perm os.FileMode) error
118 Remove(name string) error
119 RemoveAll(name string) error
120 Rename(oldname, newname string) error
121 MarshalManifest(prefix string) (string, error)
124 type fileSystem struct {
128 func (fs *fileSystem) OpenFile(name string, flag int, perm os.FileMode) (File, error) {
129 return fs.dirnode.OpenFile(path.Clean(name), flag, perm)
132 func (fs *fileSystem) Open(name string) (http.File, error) {
133 return fs.dirnode.OpenFile(path.Clean(name), os.O_RDONLY, 0)
136 func (fs *fileSystem) Create(name string) (File, error) {
137 return fs.dirnode.OpenFile(path.Clean(name), os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0)
140 func (fs *fileSystem) Stat(name string) (os.FileInfo, error) {
141 f, err := fs.OpenFile(name, os.O_RDONLY, 0)
149 type inode interface {
151 Read([]byte, filenodePtr) (int, filenodePtr, error)
152 Write([]byte, filenodePtr) (int, filenodePtr, error)
153 Truncate(int64) error
154 Readdir() []os.FileInfo
162 // filenode implements inode.
163 type filenode struct {
167 repacked int64 // number of times anything in []extents has changed len
168 memsize int64 // bytes in memExtents
172 // filenodePtr is an offset into a file that is (usually) efficient to
173 // seek to. Specifically, if filenode.repacked==filenodePtr.repacked
174 // then filenode.extents[filenodePtr.extentIdx][filenodePtr.extentOff]
175 // corresponds to file offset filenodePtr.off. Otherwise, it is
176 // necessary to reexamine len(filenode.extents[0]) etc. to find the
177 // correct extent and offset.
178 type filenodePtr struct {
185 // seek returns a ptr that is consistent with both startPtr.off and
186 // the current state of fn. The caller must already hold fn.RLock() or
189 // If startPtr points beyond the end of the file, ptr will point to
190 // exactly the end of the file.
194 // ptr.extentIdx == len(filenode.extents) // i.e., at EOF
196 // filenode.extents[ptr.extentIdx].Len() >= ptr.extentOff
197 func (fn *filenode) seek(startPtr filenodePtr) (ptr filenodePtr) {
200 // meaningless anyway
202 } else if ptr.off >= fn.fileinfo.size {
203 ptr.extentIdx = len(fn.extents)
205 ptr.repacked = fn.repacked
207 } else if ptr.repacked == fn.repacked {
208 // extentIdx and extentOff accurately reflect ptr.off,
209 // but might have fallen off the end of an extent
210 if ptr.extentOff >= fn.extents[ptr.extentIdx].Len() {
217 ptr.repacked = fn.repacked
219 if ptr.off >= fn.fileinfo.size {
220 ptr.extentIdx, ptr.extentOff = len(fn.extents), 0
223 // Recompute extentIdx and extentOff. We have already
224 // established fn.fileinfo.size > ptr.off >= 0, so we don't
225 // have to deal with edge cases here.
227 for ptr.extentIdx, ptr.extentOff = 0, 0; off < ptr.off; ptr.extentIdx++ {
228 // This would panic (index out of range) if
229 // fn.fileinfo.size were larger than
230 // sum(fn.extents[i].Len()) -- but that can't happen
231 // because we have ensured fn.fileinfo.size is always
233 extLen := int64(fn.extents[ptr.extentIdx].Len())
234 if off+extLen > ptr.off {
235 ptr.extentOff = int(ptr.off - off)
243 func (fn *filenode) appendExtent(e extent) {
246 fn.extents = append(fn.extents, e)
247 fn.fileinfo.size += int64(e.Len())
250 func (fn *filenode) Parent() inode {
254 func (fn *filenode) Readdir() []os.FileInfo {
258 func (fn *filenode) Read(p []byte, startPtr filenodePtr) (n int, ptr filenodePtr, err error) {
259 ptr = fn.seek(startPtr)
261 err = ErrNegativeOffset
264 if ptr.extentIdx >= len(fn.extents) {
268 n, err = fn.extents[ptr.extentIdx].ReadAt(p, int64(ptr.extentOff))
272 if ptr.extentOff == fn.extents[ptr.extentIdx].Len() {
275 if ptr.extentIdx < len(fn.extents) && err == io.EOF {
283 func (fn *filenode) Size() int64 {
286 return fn.fileinfo.Size()
289 func (fn *filenode) Stat() os.FileInfo {
295 func (fn *filenode) Truncate(size int64) error {
298 return fn.truncate(size)
301 func (fn *filenode) truncate(size int64) error {
302 if size == fn.fileinfo.size {
306 if size < fn.fileinfo.size {
307 ptr := fn.seek(filenodePtr{off: size})
308 for i := ptr.extentIdx; i < len(fn.extents); i++ {
309 if ext, ok := fn.extents[i].(*memExtent); ok {
310 fn.memsize -= int64(ext.Len())
313 if ptr.extentOff == 0 {
314 fn.extents = fn.extents[:ptr.extentIdx]
316 fn.extents = fn.extents[:ptr.extentIdx+1]
317 switch ext := fn.extents[ptr.extentIdx].(type) {
319 ext.Truncate(ptr.extentOff)
320 fn.memsize += int64(ext.Len())
322 fn.extents[ptr.extentIdx] = ext.Slice(0, ptr.extentOff)
325 fn.fileinfo.size = size
328 for size > fn.fileinfo.size {
329 grow := size - fn.fileinfo.size
332 if len(fn.extents) == 0 {
334 fn.extents = append(fn.extents, e)
335 } else if e, ok = fn.extents[len(fn.extents)-1].(writableExtent); !ok || e.Len() >= maxBlockSize {
337 fn.extents = append(fn.extents, e)
339 if maxgrow := int64(maxBlockSize - e.Len()); maxgrow < grow {
342 e.Truncate(e.Len() + int(grow))
343 fn.fileinfo.size += grow
349 // Caller must hold lock.
350 func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePtr, err error) {
351 if startPtr.off > fn.fileinfo.size {
352 if err = fn.truncate(startPtr.off); err != nil {
353 return 0, startPtr, err
356 ptr = fn.seek(startPtr)
358 err = ErrNegativeOffset
361 for len(p) > 0 && err == nil {
363 if len(cando) > maxBlockSize {
364 cando = cando[:maxBlockSize]
366 // Rearrange/grow fn.extents (and shrink cando if
367 // needed) such that cando can be copied to
368 // fn.extents[ptr.extentIdx] at offset ptr.extentOff.
370 prev := ptr.extentIdx - 1
372 if cur < len(fn.extents) {
373 _, curWritable = fn.extents[cur].(writableExtent)
375 var prevAppendable bool
376 if prev >= 0 && fn.extents[prev].Len() < maxBlockSize {
377 _, prevAppendable = fn.extents[prev].(writableExtent)
379 if ptr.extentOff > 0 && !curWritable {
380 // Split a non-writable block.
381 if max := fn.extents[cur].Len() - ptr.extentOff; max <= len(cando) {
382 // Truncate cur, and insert a new
385 fn.extents = append(fn.extents, nil)
386 copy(fn.extents[cur+1:], fn.extents[cur:])
388 // Split cur into two copies, truncate
389 // the one on the left, shift the one
390 // on the right, and insert a new
391 // extent between them.
392 fn.extents = append(fn.extents, nil, nil)
393 copy(fn.extents[cur+2:], fn.extents[cur:])
394 fn.extents[cur+2] = fn.extents[cur+2].Slice(ptr.extentOff+len(cando), -1)
399 e.Truncate(len(cando))
400 fn.memsize += int64(len(cando))
402 fn.extents[prev] = fn.extents[prev].Slice(0, ptr.extentOff)
407 } else if curWritable {
408 if fit := int(fn.extents[cur].Len()) - ptr.extentOff; fit < len(cando) {
413 // Shrink cando if needed to fit in prev extent.
414 if cangrow := maxBlockSize - fn.extents[prev].Len(); cangrow < len(cando) {
415 cando = cando[:cangrow]
419 if cur == len(fn.extents) {
420 // ptr is at EOF, filesize is changing.
421 fn.fileinfo.size += int64(len(cando))
422 } else if el := fn.extents[cur].Len(); el <= len(cando) {
423 // cando is long enough that we won't
424 // need cur any more. shrink cando to
425 // be exactly as long as cur
426 // (otherwise we'd accidentally shift
427 // the effective position of all
428 // extents after cur).
430 copy(fn.extents[cur:], fn.extents[cur+1:])
431 fn.extents = fn.extents[:len(fn.extents)-1]
433 // shrink cur by the same #bytes we're growing prev
434 fn.extents[cur] = fn.extents[cur].Slice(len(cando), -1)
440 ptr.extentOff = fn.extents[prev].Len()
441 fn.extents[prev].(writableExtent).Truncate(ptr.extentOff + len(cando))
442 fn.memsize += int64(len(cando))
446 // Insert an extent between prev and cur, and advance prev/cur.
447 fn.extents = append(fn.extents, nil)
448 if cur < len(fn.extents) {
449 copy(fn.extents[cur+1:], fn.extents[cur:])
453 // appending a new extent does
454 // not invalidate any ptrs
457 e.Truncate(len(cando))
458 fn.memsize += int64(len(cando))
465 // Finally we can copy bytes from cando to the current extent.
466 fn.extents[ptr.extentIdx].(writableExtent).WriteAt(cando, ptr.extentOff)
470 ptr.off += int64(len(cando))
471 ptr.extentOff += len(cando)
472 if ptr.extentOff >= maxBlockSize {
475 if fn.extents[ptr.extentIdx].Len() == ptr.extentOff {
483 // Write some data out to disk to reduce memory use. Caller must have
485 func (fn *filenode) pruneMemExtents() {
486 // TODO: async (don't hold Lock() while waiting for Keep)
487 // TODO: share code with (*dirnode)sync()
488 // TODO: pack/flush small blocks too, when fragmented
489 for idx, ext := range fn.extents {
490 ext, ok := ext.(*memExtent)
491 if !ok || ext.Len() < maxBlockSize {
494 locator, _, err := fn.parent.kc.PutB(ext.buf)
496 // TODO: stall (or return errors from)
497 // subsequent writes until flushing
501 fn.memsize -= int64(ext.Len())
502 fn.extents[idx] = storedExtent{
512 // FileSystem returns a CollectionFileSystem for the collection.
513 func (c *Collection) FileSystem(client *Client, kc keepClient) (CollectionFileSystem, error) {
514 fs := &fileSystem{dirnode: dirnode{
517 fileinfo: fileinfo{name: ".", mode: os.ModeDir | 0755},
519 inodes: make(map[string]inode),
521 fs.dirnode.parent = &fs.dirnode
522 if err := fs.dirnode.loadManifest(c.ManifestText); err != nil {
534 unreaddirs []os.FileInfo
537 func (f *file) Read(p []byte) (n int, err error) {
539 return 0, ErrWriteOnlyMode
542 defer f.inode.RUnlock()
543 n, f.ptr, err = f.inode.Read(p, f.ptr)
547 func (f *file) Seek(off int64, whence int) (pos int64, err error) {
548 size := f.inode.Size()
559 return f.ptr.off, ErrNegativeOffset
561 if ptr.off != f.ptr.off {
563 // force filenode to recompute f.ptr fields on next
567 return f.ptr.off, nil
570 func (f *file) Truncate(size int64) error {
571 return f.inode.Truncate(size)
574 func (f *file) Write(p []byte) (n int, err error) {
576 return 0, ErrReadOnlyFile
579 defer f.inode.Unlock()
580 if fn, ok := f.inode.(*filenode); ok && f.append {
582 off: fn.fileinfo.size,
583 extentIdx: len(fn.extents),
585 repacked: fn.repacked,
588 n, f.ptr, err = f.inode.Write(p, f.ptr)
592 func (f *file) Readdir(count int) ([]os.FileInfo, error) {
593 if !f.inode.Stat().IsDir() {
594 return nil, ErrInvalidOperation
597 return f.inode.Readdir(), nil
599 if f.unreaddirs == nil {
600 f.unreaddirs = f.inode.Readdir()
602 if len(f.unreaddirs) == 0 {
605 if count > len(f.unreaddirs) {
606 count = len(f.unreaddirs)
608 ret := f.unreaddirs[:count]
609 f.unreaddirs = f.unreaddirs[count:]
613 func (f *file) Stat() (os.FileInfo, error) {
614 return f.inode.Stat(), nil
617 func (f *file) Close() error {
622 type dirnode struct {
627 inodes map[string]inode
631 // sync flushes in-memory data (for all files in the tree rooted at
632 // dn) to persistent storage. Caller must hold dn.Lock().
633 func (dn *dirnode) sync() error {
634 type shortBlock struct {
638 var pending []shortBlock
641 flush := func(sbs []shortBlock) error {
645 block := make([]byte, 0, maxBlockSize)
646 for _, sb := range sbs {
647 block = append(block, sb.fn.extents[sb.idx].(*memExtent).buf...)
649 locator, _, err := dn.kc.PutB(block)
654 for _, sb := range sbs {
655 data := sb.fn.extents[sb.idx].(*memExtent).buf
656 sb.fn.extents[sb.idx] = storedExtent{
664 sb.fn.memsize -= int64(len(data))
669 names := make([]string, 0, len(dn.inodes))
670 for name := range dn.inodes {
671 names = append(names, name)
675 for _, name := range names {
676 fn, ok := dn.inodes[name].(*filenode)
682 for idx, ext := range fn.extents {
683 ext, ok := ext.(*memExtent)
687 if ext.Len() > maxBlockSize/2 {
688 if err := flush([]shortBlock{{fn, idx}}); err != nil {
693 if pendingLen+ext.Len() > maxBlockSize {
694 if err := flush(pending); err != nil {
700 pending = append(pending, shortBlock{fn, idx})
701 pendingLen += ext.Len()
704 return flush(pending)
707 func (dn *dirnode) MarshalManifest(prefix string) (string, error) {
710 return dn.marshalManifest(prefix)
713 // caller must have read lock.
714 func (dn *dirnode) marshalManifest(prefix string) (string, error) {
716 type m1segment struct {
721 var segments []m1segment
725 if err := dn.sync(); err != nil {
729 names := make([]string, 0, len(dn.inodes))
730 for name, node := range dn.inodes {
731 names = append(names, name)
737 for _, name := range names {
738 node := dn.inodes[name]
739 switch node := node.(type) {
741 subdir, err := node.marshalManifest(prefix + "/" + name)
745 subdirs = subdirs + subdir
747 if len(node.extents) == 0 {
748 segments = append(segments, m1segment{name: name})
751 for _, e := range node.extents {
752 switch e := e.(type) {
754 if len(blocks) > 0 && blocks[len(blocks)-1] == e.locator {
755 streamLen -= int64(e.size)
757 blocks = append(blocks, e.locator)
761 offset: streamLen + int64(e.offset),
762 length: int64(e.length),
764 if prev := len(segments) - 1; prev >= 0 &&
765 segments[prev].name == name &&
766 segments[prev].offset+segments[prev].length == next.offset {
767 segments[prev].length += next.length
769 segments = append(segments, next)
771 streamLen += int64(e.size)
773 // This can't happen: we
774 // haven't unlocked since
776 panic(fmt.Sprintf("can't marshal extent type %T", e))
780 panic(fmt.Sprintf("can't marshal inode type %T", node))
783 var filetokens []string
784 for _, s := range segments {
785 filetokens = append(filetokens, fmt.Sprintf("%d:%d:%s", s.offset, s.length, manifestEscape(s.name)))
787 if len(filetokens) == 0 {
789 } else if len(blocks) == 0 {
790 blocks = []string{"d41d8cd98f00b204e9800998ecf8427e+0"}
792 return manifestEscape(prefix) + " " + strings.Join(blocks, " ") + " " + strings.Join(filetokens, " ") + "\n" + subdirs, nil
795 func (dn *dirnode) loadManifest(txt string) error {
798 streams := strings.Split(txt, "\n")
799 if streams[len(streams)-1] != "" {
800 return fmt.Errorf("line %d: no trailing newline", len(streams))
802 for i, stream := range streams[:len(streams)-1] {
804 var extents []storedExtent
805 var anyFileTokens bool
808 for i, token := range strings.Split(stream, " ") {
810 dirname = manifestUnescape(token)
813 if !strings.Contains(token, ":") {
815 return fmt.Errorf("line %d: bad file segment %q", lineno, token)
817 toks := strings.SplitN(token, "+", 3)
819 return fmt.Errorf("line %d: bad locator %q", lineno, token)
821 length, err := strconv.ParseInt(toks[1], 10, 32)
822 if err != nil || length < 0 {
823 return fmt.Errorf("line %d: bad locator %q", lineno, token)
825 extents = append(extents, storedExtent{
832 } else if len(extents) == 0 {
833 return fmt.Errorf("line %d: bad locator %q", lineno, token)
836 toks := strings.Split(token, ":")
838 return fmt.Errorf("line %d: bad file segment %q", lineno, token)
842 offset, err := strconv.ParseInt(toks[0], 10, 64)
843 if err != nil || offset < 0 {
844 return fmt.Errorf("line %d: bad file segment %q", lineno, token)
846 length, err := strconv.ParseInt(toks[1], 10, 64)
847 if err != nil || length < 0 {
848 return fmt.Errorf("line %d: bad file segment %q", lineno, token)
850 name := path.Clean(dirname + "/" + manifestUnescape(toks[2]))
851 fnode, err := dn.createFileAndParents(name)
853 return fmt.Errorf("line %d: cannot use path %q: %s", lineno, name, err)
855 // Map the stream offset/range coordinates to
856 // block/offset/range coordinates and add
857 // corresponding storedExtents to the filenode
859 // Can't continue where we left off.
860 // TODO: binary search instead of
861 // rewinding all the way (but this
862 // situation might be rare anyway)
865 for next := int64(0); extIdx < len(extents); extIdx, pos = extIdx+1, next {
867 next = pos + int64(e.Len())
868 if next <= offset || e.Len() == 0 {
872 if pos >= offset+length {
877 blkOff = int(offset - pos)
879 blkLen := e.Len() - blkOff
880 if pos+int64(blkOff+blkLen) > offset+length {
881 blkLen = int(offset + length - pos - int64(blkOff))
883 fnode.appendExtent(storedExtent{
890 if next > offset+length {
894 if extIdx == len(extents) && pos < offset+length {
895 return fmt.Errorf("line %d: invalid segment in %d-byte stream: %q", lineno, pos, token)
899 return fmt.Errorf("line %d: no file segments", lineno)
900 } else if len(extents) == 0 {
901 return fmt.Errorf("line %d: no locators", lineno)
902 } else if dirname == "" {
903 return fmt.Errorf("line %d: no stream name", lineno)
909 // only safe to call from loadManifest -- no locking
910 func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) {
911 names := strings.Split(path, "/")
912 if basename := names[len(names)-1]; basename == "" || basename == "." || basename == ".." {
913 err = fmt.Errorf("invalid filename")
917 for i, name := range names {
918 dn, ok := node.(*dirnode)
923 if name == "" || name == "." {
930 node, ok = dn.inodes[name]
932 if i == len(names)-1 {
933 fn = dn.newFilenode(name, 0755)
936 node = dn.newDirnode(name, 0755)
940 if fn, ok = node.(*filenode); !ok {
941 err = ErrInvalidArgument
946 func (dn *dirnode) mkdir(name string) (*file, error) {
947 return dn.OpenFile(name, os.O_CREATE|os.O_EXCL, os.ModeDir|0755)
950 func (dn *dirnode) Mkdir(name string, perm os.FileMode) error {
951 f, err := dn.mkdir(name)
958 func (dn *dirnode) Remove(name string) error {
959 return dn.remove(name, false)
962 func (dn *dirnode) RemoveAll(name string) error {
963 return dn.remove(name, true)
966 func (dn *dirnode) remove(name string, recursive bool) error {
967 dirname, name := path.Split(name)
968 if name == "" || name == "." || name == ".." {
969 return ErrInvalidArgument
971 dn, ok := dn.lookupPath(dirname).(*dirnode)
973 return os.ErrNotExist
977 switch node := dn.inodes[name].(type) {
979 return os.ErrNotExist
983 if !recursive && len(node.inodes) > 0 {
984 return ErrDirectoryNotEmpty
987 delete(dn.inodes, name)
991 func (dn *dirnode) Rename(oldname, newname string) error {
992 olddir, oldname := path.Split(oldname)
993 if oldname == "" || oldname == "." || oldname == ".." {
994 return ErrInvalidArgument
996 olddirf, err := dn.OpenFile(olddir+".", os.O_RDONLY, 0)
998 return fmt.Errorf("%q: %s", olddir, err)
1000 defer olddirf.Close()
1001 newdir, newname := path.Split(newname)
1002 if newname == "." || newname == ".." {
1003 return ErrInvalidArgument
1004 } else if newname == "" {
1005 // Rename("a/b", "c/") means Rename("a/b", "c/b")
1008 newdirf, err := dn.OpenFile(newdir+".", os.O_RDONLY, 0)
1010 return fmt.Errorf("%q: %s", newdir, err)
1012 defer newdirf.Close()
1014 // When acquiring locks on multiple nodes, all common
1015 // ancestors must be locked first in order to avoid
1016 // deadlock. This is assured by locking the path from root to
1017 // newdir, then locking the path from root to olddir, skipping
1018 // any already-locked nodes.
1019 needLock := []sync.Locker{}
1020 for _, f := range []*file{olddirf, newdirf} {
1022 needLock = append(needLock, node)
1023 for node.Parent() != node {
1024 node = node.Parent()
1025 needLock = append(needLock, node)
1028 locked := map[sync.Locker]bool{}
1029 for i := len(needLock) - 1; i >= 0; i-- {
1030 if n := needLock[i]; !locked[n] {
1037 olddn := olddirf.inode.(*dirnode)
1038 newdn := newdirf.inode.(*dirnode)
1039 oldinode, ok := olddn.inodes[oldname]
1041 return os.ErrNotExist
1043 if existing, ok := newdn.inodes[newname]; ok {
1044 // overwriting an existing file or dir
1045 if dn, ok := existing.(*dirnode); ok {
1046 if !oldinode.Stat().IsDir() {
1047 return ErrIsDirectory
1051 if len(dn.inodes) > 0 {
1052 return ErrDirectoryNotEmpty
1056 newdn.fileinfo.size++
1058 newdn.inodes[newname] = oldinode
1059 delete(olddn.inodes, oldname)
1060 olddn.fileinfo.size--
1064 func (dn *dirnode) Parent() inode {
1070 func (dn *dirnode) Readdir() (fi []os.FileInfo) {
1073 fi = make([]os.FileInfo, 0, len(dn.inodes))
1074 for _, inode := range dn.inodes {
1075 fi = append(fi, inode.Stat())
1080 func (dn *dirnode) Read(p []byte, ptr filenodePtr) (int, filenodePtr, error) {
1081 return 0, ptr, ErrInvalidOperation
1084 func (dn *dirnode) Write(p []byte, ptr filenodePtr) (int, filenodePtr, error) {
1085 return 0, ptr, ErrInvalidOperation
1088 func (dn *dirnode) Size() int64 {
1091 return dn.fileinfo.Size()
1094 func (dn *dirnode) Stat() os.FileInfo {
1100 func (dn *dirnode) Truncate(int64) error {
1101 return ErrInvalidOperation
1104 // lookupPath returns the inode for the file/directory with the given
1105 // name (which may contain "/" separators), along with its parent
1106 // node. If no such file/directory exists, the returned node is nil.
1107 func (dn *dirnode) lookupPath(path string) (node inode) {
1109 for _, name := range strings.Split(path, "/") {
1110 dn, ok := node.(*dirnode)
1114 if name == "." || name == "" {
1118 node = node.Parent()
1122 node = dn.inodes[name]
1128 func (dn *dirnode) newDirnode(name string, perm os.FileMode) *dirnode {
1135 mode: os.ModeDir | perm,
1138 if dn.inodes == nil {
1139 dn.inodes = make(map[string]inode)
1141 dn.inodes[name] = child
1146 func (dn *dirnode) newFilenode(name string, perm os.FileMode) *filenode {
1154 if dn.inodes == nil {
1155 dn.inodes = make(map[string]inode)
1157 dn.inodes[name] = child
1162 // OpenFile is analogous to os.OpenFile().
1163 func (dn *dirnode) OpenFile(name string, flag int, perm os.FileMode) (*file, error) {
1164 if flag&os.O_SYNC != 0 {
1165 return nil, ErrSyncNotSupported
1167 dirname, name := path.Split(name)
1168 dn, ok := dn.lookupPath(dirname).(*dirnode)
1170 return nil, os.ErrNotExist
1172 var readable, writable bool
1173 switch flag & (os.O_RDWR | os.O_RDONLY | os.O_WRONLY) {
1182 return nil, fmt.Errorf("invalid flags 0x%x", flag)
1185 // A directory can be opened via "foo/", "foo/.", or
1189 return &file{inode: dn}, nil
1191 return &file{inode: dn.Parent()}, nil
1194 createMode := flag&os.O_CREATE != 0
1202 n, ok := dn.inodes[name]
1205 return nil, os.ErrNotExist
1208 n = dn.newDirnode(name, 0755)
1210 n = dn.newFilenode(name, 0755)
1212 } else if flag&os.O_EXCL != 0 {
1213 return nil, ErrFileExists
1214 } else if flag&os.O_TRUNC != 0 {
1216 return nil, fmt.Errorf("invalid flag O_TRUNC in read-only mode")
1217 } else if fn, ok := n.(*filenode); !ok {
1218 return nil, fmt.Errorf("invalid flag O_TRUNC when opening directory")
1225 append: flag&os.O_APPEND != 0,
1231 type extent interface {
1234 // Return a new extent with a subsection of the data from this
1235 // one. length<0 means length=Len()-off.
1236 Slice(off int, length int) extent
1239 type writableExtent interface {
1241 WriteAt(p []byte, off int)
1245 type memExtent struct {
1249 func (me *memExtent) Len() int {
1253 func (me *memExtent) Slice(off, length int) extent {
1255 length = len(me.buf) - off
1257 buf := make([]byte, length)
1258 copy(buf, me.buf[off:])
1259 return &memExtent{buf: buf}
1262 func (me *memExtent) Truncate(n int) {
1263 if n > cap(me.buf) {
1266 newsize = newsize << 2
1268 newbuf := make([]byte, n, newsize)
1269 copy(newbuf, me.buf)
1272 // Zero unused part when shrinking, in case we grow
1273 // and start using it again later.
1274 for i := n; i < len(me.buf); i++ {
1281 func (me *memExtent) WriteAt(p []byte, off int) {
1282 if off+len(p) > len(me.buf) {
1283 panic("overflowed extent")
1285 copy(me.buf[off:], p)
1288 func (me *memExtent) ReadAt(p []byte, off int64) (n int, err error) {
1289 if off > int64(me.Len()) {
1293 n = copy(p, me.buf[int(off):])
1300 type storedExtent struct {
1308 func (se storedExtent) Len() int {
1312 func (se storedExtent) Slice(n, size int) extent {
1315 if size >= 0 && se.length > size {
1321 func (se storedExtent) ReadAt(p []byte, off int64) (n int, err error) {
1322 if off > int64(se.length) {
1325 maxlen := se.length - int(off)
1326 if len(p) > maxlen {
1328 n, err = se.kc.ReadAt(se.locator, p, int(off)+se.offset)
1334 return se.kc.ReadAt(se.locator, p, int(off)+se.offset)
1337 func canonicalName(name string) string {
1338 name = path.Clean("/" + name)
1339 if name == "/" || name == "./" {
1341 } else if strings.HasPrefix(name, "/") {
1347 var manifestEscapeSeq = regexp.MustCompile(`\\([0-9]{3}|\\)`)
1349 func manifestUnescapeFunc(seq string) string {
1353 i, err := strconv.ParseUint(seq[1:], 8, 8)
1355 // Invalid escape sequence: can't unescape.
1358 return string([]byte{byte(i)})
1361 func manifestUnescape(s string) string {
1362 return manifestEscapeSeq.ReplaceAllStringFunc(s, manifestUnescapeFunc)
1365 var manifestEscapedChar = regexp.MustCompile(`[^\.\w/]`)
1367 func manifestEscapeFunc(seq string) string {
1368 return fmt.Sprintf("\\%03o", byte(seq[0]))
1371 func manifestEscape(s string) string {
1372 return manifestEscapedChar.ReplaceAllStringFunc(s, manifestEscapeFunc)