package arvados
import (
- "crypto/md5"
"errors"
"fmt"
"io"
type keepClient interface {
ReadAt(locator string, p []byte, off int) (int, error)
+ PutB(p []byte) (string, int, error)
}
type fileinfo struct {
return nil
}
-func (fi fileinfo) Stat() os.FileInfo {
- return fi
-}
-
// A CollectionFileSystem is an http.Filesystem plus Stat() and
// support for opening writable files.
type CollectionFileSystem interface {
}
type inode interface {
- os.FileInfo
Parent() inode
Read([]byte, filenodePtr) (int, filenodePtr, error)
Write([]byte, filenodePtr) (int, filenodePtr, error)
Truncate(int64) error
Readdir() []os.FileInfo
+ Size() int64
Stat() os.FileInfo
sync.Locker
RLock()
// filenode implements inode.
type filenode struct {
- fileinfo
+ fileinfo fileinfo
parent *dirnode
extents []extent
repacked int64 // number of times anything in []extents has changed len
+ memsize int64 // bytes in memExtents
sync.RWMutex
}
return
}
+func (fn *filenode) Size() int64 {
+ fn.RLock()
+ defer fn.RUnlock()
+ return fn.fileinfo.Size()
+}
+
+func (fn *filenode) Stat() os.FileInfo {
+ fn.RLock()
+ defer fn.RUnlock()
+ return fn.fileinfo
+}
+
func (fn *filenode) Truncate(size int64) error {
fn.Lock()
defer fn.Unlock()
if size < fn.fileinfo.size {
ptr := fn.seek(filenodePtr{off: size, repacked: fn.repacked - 1})
+ for i := ptr.extentIdx; i < len(fn.extents); i++ {
+ if ext, ok := fn.extents[i].(*memExtent); ok {
+ fn.memsize -= int64(ext.Len())
+ }
+ }
if ptr.extentOff == 0 {
fn.extents = fn.extents[:ptr.extentIdx]
} else {
fn.extents = fn.extents[:ptr.extentIdx+1]
- e := fn.extents[ptr.extentIdx]
- if e, ok := e.(writableExtent); ok {
- e.Truncate(ptr.extentOff)
- } else {
- fn.extents[ptr.extentIdx] = e.Slice(0, ptr.extentOff)
+ switch ext := fn.extents[ptr.extentIdx].(type) {
+ case *memExtent:
+ ext.Truncate(ptr.extentOff)
+ fn.memsize += int64(ext.Len())
+ default:
+ fn.extents[ptr.extentIdx] = ext.Slice(0, ptr.extentOff)
}
}
fn.fileinfo.size = size
}
e.Truncate(e.Len() + int(grow))
fn.fileinfo.size += grow
+ fn.memsize += grow
}
return nil
}
prev++
e := &memExtent{}
e.Truncate(len(cando))
+ fn.memsize += int64(len(cando))
fn.extents[cur] = e
fn.extents[prev] = fn.extents[prev].Slice(0, ptr.extentOff)
ptr.extentIdx++
ptr.extentIdx--
ptr.extentOff = fn.extents[prev].Len()
fn.extents[prev].(writableExtent).Truncate(ptr.extentOff + len(cando))
+ fn.memsize += int64(len(cando))
ptr.repacked++
fn.repacked++
} else {
}
e := &memExtent{}
e.Truncate(len(cando))
+ fn.memsize += int64(len(cando))
fn.extents[cur] = e
cur++
prev++
ptr.off += int64(len(cando))
ptr.extentOff += len(cando)
+ if ptr.extentOff >= maxBlockSize {
+ fn.pruneMemExtents()
+ }
if fn.extents[ptr.extentIdx].Len() == ptr.extentOff {
ptr.extentOff = 0
ptr.extentIdx++
return
}
+// Write some data out to disk to reduce memory use. Caller must have
+// write lock.
+func (fn *filenode) pruneMemExtents() {
+ // TODO: async (don't hold Lock() while waiting for Keep)
+ // TODO: share code with (*dirnode)sync()
+ // TODO: pack/flush small blocks too, when fragmented
+ for idx, ext := range fn.extents {
+ ext, ok := ext.(*memExtent)
+ if !ok || ext.Len() < maxBlockSize {
+ continue
+ }
+ locator, _, err := fn.parent.kc.PutB(ext.buf)
+ if err != nil {
+ // TODO: stall (or return errors from)
+ // subsequent writes until flushing
+ // starts to succeed
+ continue
+ }
+ fn.memsize -= int64(ext.Len())
+ fn.extents[idx] = storedExtent{
+ kc: fn.parent.kc,
+ locator: locator,
+ size: ext.Len(),
+ offset: 0,
+ length: ext.Len(),
+ }
+ }
+}
+
// FileSystem returns a CollectionFileSystem for the collection.
-func (c *Collection) FileSystem(client *Client, kc keepClient) CollectionFileSystem {
+func (c *Collection) FileSystem(client *Client, kc keepClient) (CollectionFileSystem, error) {
fs := &fileSystem{dirnode: dirnode{
- cache: &keepBlockCache{kc: kc},
client: client,
kc: kc,
fileinfo: fileinfo{name: ".", mode: os.ModeDir | 0755},
inodes: make(map[string]inode),
}}
fs.dirnode.parent = &fs.dirnode
- fs.dirnode.loadManifest(c.ManifestText)
- return fs
+ if err := fs.dirnode.loadManifest(c.ManifestText); err != nil {
+ return nil, err
+ }
+ return fs, nil
}
type file struct {
}
func (f *file) Readdir(count int) ([]os.FileInfo, error) {
- if !f.inode.IsDir() {
+ if !f.inode.Stat().IsDir() {
return nil, ErrInvalidOperation
}
if count <= 0 {
}
func (f *file) Stat() (os.FileInfo, error) {
- return f.inode, nil
+ return f.inode.Stat(), nil
}
func (f *file) Close() error {
}
type dirnode struct {
- fileinfo
- parent *dirnode
- client *Client
- kc keepClient
- cache blockCache
- inodes map[string]inode
+ fileinfo fileinfo
+ parent *dirnode
+ client *Client
+ kc keepClient
+ inodes map[string]inode
sync.RWMutex
}
-// caller must hold dn.Lock().
+// sync flushes in-memory data (for all files in the tree rooted at
+// dn) to persistent storage. Caller must hold dn.Lock().
func (dn *dirnode) sync() error {
type shortBlock struct {
fn *filenode
if len(sbs) == 0 {
return nil
}
- hash := md5.New()
- size := 0
+ block := make([]byte, 0, maxBlockSize)
for _, sb := range sbs {
- data := sb.fn.extents[sb.idx].(*memExtent).buf
- if _, err := hash.Write(data); err != nil {
- return err
- }
- size += len(data)
+ block = append(block, sb.fn.extents[sb.idx].(*memExtent).buf...)
+ }
+ locator, _, err := dn.kc.PutB(block)
+ if err != nil {
+ return err
}
- // FIXME: write to keep
- locator := fmt.Sprintf("%x+%d", hash.Sum(nil), size)
off := 0
for _, sb := range sbs {
data := sb.fn.extents[sb.idx].(*memExtent).buf
sb.fn.extents[sb.idx] = storedExtent{
- cache: dn.cache,
+ kc: dn.kc,
locator: locator,
- size: size,
+ size: len(block),
offset: off,
length: len(data),
}
off += len(data)
+ sb.fn.memsize -= int64(len(data))
}
return nil
}
func (dn *dirnode) MarshalManifest(prefix string) (string, error) {
dn.Lock()
defer dn.Unlock()
- if err := dn.sync(); err != nil {
- return "", err
- }
+ return dn.marshalManifest(prefix)
+}
+// caller must have read lock.
+func (dn *dirnode) marshalManifest(prefix string) (string, error) {
var streamLen int64
type m1segment struct {
name string
var subdirs string
var blocks []string
+ if err := dn.sync(); err != nil {
+ return "", err
+ }
+
names := make([]string, 0, len(dn.inodes))
- for name := range dn.inodes {
+ for name, node := range dn.inodes {
names = append(names, name)
+ node.Lock()
+ defer node.Unlock()
}
sort.Strings(names)
node := dn.inodes[name]
switch node := node.(type) {
case *dirnode:
- subdir, err := node.MarshalManifest(prefix + "/" + node.Name())
+ subdir, err := node.marshalManifest(prefix + "/" + name)
if err != nil {
return "", err
}
case *filenode:
for _, e := range node.extents {
switch e := e.(type) {
- case *memExtent:
- blocks = append(blocks, fmt.Sprintf("FIXME+%d", e.Len()))
- segments = append(segments, m1segment{
- name: node.Name(),
- offset: streamLen,
- length: int64(e.Len()),
- })
- streamLen += int64(e.Len())
case storedExtent:
if len(blocks) > 0 && blocks[len(blocks)-1] == e.locator {
streamLen -= int64(e.size)
blocks = append(blocks, e.locator)
}
segments = append(segments, m1segment{
- name: node.Name(),
+ name: name,
offset: streamLen + int64(e.offset),
length: int64(e.length),
})
streamLen += int64(e.size)
default:
+ // This can't happen: we
+ // haven't unlocked since
+ // calling sync().
panic(fmt.Sprintf("can't marshal extent type %T", e))
}
}
}
var filetokens []string
for _, s := range segments {
- filetokens = append(filetokens, fmt.Sprintf("%d:%d:%s", s.offset, s.length, s.name))
+ filetokens = append(filetokens, fmt.Sprintf("%d:%d:%s", s.offset, s.length, manifestEscape(s.name)))
}
if len(filetokens) == 0 {
return subdirs, nil
} else if len(blocks) == 0 {
blocks = []string{"d41d8cd98f00b204e9800998ecf8427e+0"}
}
- return prefix + " " + strings.Join(blocks, " ") + " " + strings.Join(filetokens, " ") + "\n" + subdirs, nil
+ return manifestEscape(prefix) + " " + strings.Join(blocks, " ") + " " + strings.Join(filetokens, " ") + "\n" + subdirs, nil
}
-func (dn *dirnode) loadManifest(txt string) {
+func (dn *dirnode) loadManifest(txt string) error {
// FIXME: faster
var dirname string
- for _, stream := range strings.Split(txt, "\n") {
+ streams := strings.Split(txt, "\n")
+ if streams[len(streams)-1] != "" {
+ return fmt.Errorf("line %d: no trailing newline", len(streams))
+ }
+ for i, stream := range streams[:len(streams)-1] {
+ lineno := i + 1
var extents []storedExtent
+ var anyFileTokens bool
for i, token := range strings.Split(stream, " ") {
if i == 0 {
dirname = manifestUnescape(token)
continue
}
if !strings.Contains(token, ":") {
+ if anyFileTokens {
+ return fmt.Errorf("line %d: bad file segment %q", lineno, token)
+ }
toks := strings.SplitN(token, "+", 3)
if len(toks) < 2 {
- // FIXME: broken
- continue
+ return fmt.Errorf("line %d: bad locator %q", lineno, token)
}
length, err := strconv.ParseInt(toks[1], 10, 32)
if err != nil || length < 0 {
- // FIXME: broken
- continue
+ return fmt.Errorf("line %d: bad locator %q", lineno, token)
}
extents = append(extents, storedExtent{
locator: token,
length: int(length),
})
continue
+ } else if len(extents) == 0 {
+ return fmt.Errorf("line %d: bad locator %q", lineno, token)
}
+
toks := strings.Split(token, ":")
if len(toks) != 3 {
- // FIXME: broken manifest
- continue
+ return fmt.Errorf("line %d: bad file segment %q", lineno, token)
}
+ anyFileTokens = true
+
offset, err := strconv.ParseInt(toks[0], 10, 64)
if err != nil || offset < 0 {
- // FIXME: broken manifest
- continue
+ return fmt.Errorf("line %d: bad file segment %q", lineno, token)
}
length, err := strconv.ParseInt(toks[1], 10, 64)
if err != nil || length < 0 {
- // FIXME: broken manifest
- continue
+ return fmt.Errorf("line %d: bad file segment %q", lineno, token)
}
name := path.Clean(dirname + "/" + manifestUnescape(toks[2]))
dn.makeParentDirs(name)
f, err := dn.OpenFile(name, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0700)
if err != nil {
- // FIXME: broken
- continue
+ return fmt.Errorf("line %d: cannot append to %q: %s", lineno, name, err)
}
if f.inode.Stat().IsDir() {
f.Close()
- // FIXME: broken manifest
- continue
+ return fmt.Errorf("line %d: cannot append to %q: is a directory", lineno, name)
}
// Map the stream offset/range coordinates to
// block/offset/range coordinates and add
blkLen = int(offset + length - pos - int64(blkOff))
}
f.inode.(*filenode).appendExtent(storedExtent{
- cache: dn.cache,
+ kc: dn.kc,
locator: e.locator,
size: e.size,
offset: blkOff,
pos = next
}
f.Close()
+ if pos < offset+length {
+ return fmt.Errorf("line %d: invalid segment in %d-byte stream: %q", lineno, pos, token)
+ }
+ }
+ if !anyFileTokens {
+ return fmt.Errorf("line %d: no file segments", lineno)
+ } else if len(extents) == 0 {
+ return fmt.Errorf("line %d: no locators", lineno)
+ } else if dirname == "" {
+ return fmt.Errorf("line %d: no stream name", lineno)
}
}
+ return nil
}
func (dn *dirnode) makeParentDirs(name string) (err error) {
return 0, ptr, ErrInvalidOperation
}
+func (dn *dirnode) Size() int64 {
+ dn.RLock()
+ defer dn.RUnlock()
+ return dn.fileinfo.Size()
+}
+
+func (dn *dirnode) Stat() os.FileInfo {
+ dn.RLock()
+ defer dn.RUnlock()
+ return dn.fileinfo
+}
+
func (dn *dirnode) Truncate(int64) error {
return ErrInvalidOperation
}
}
type storedExtent struct {
- cache blockCache
+ kc keepClient
locator string
size int
offset int
maxlen := se.length - int(off)
if len(p) > maxlen {
p = p[:maxlen]
- n, err = se.cache.ReadAt(se.locator, p, int(off)+se.offset)
+ n, err = se.kc.ReadAt(se.locator, p, int(off)+se.offset)
if err == nil {
err = io.EOF
}
return
}
- return se.cache.ReadAt(se.locator, p, int(off)+se.offset)
-}
-
-type blockCache interface {
- ReadAt(locator string, p []byte, off int) (n int, err error)
-}
-
-type keepBlockCache struct {
- kc keepClient
-}
-
-var scratch = make([]byte, 2<<26)
-
-func (kbc *keepBlockCache) ReadAt(locator string, p []byte, off int) (int, error) {
- return kbc.kc.ReadAt(locator, p, off)
+ return se.kc.ReadAt(se.locator, p, int(off)+se.offset)
}
func canonicalName(name string) string {
var manifestEscapeSeq = regexp.MustCompile(`\\([0-9]{3}|\\)`)
-func manifestUnescapeSeq(seq string) string {
+func manifestUnescapeFunc(seq string) string {
if seq == `\\` {
return `\`
}
}
func manifestUnescape(s string) string {
- return manifestEscapeSeq.ReplaceAllStringFunc(s, manifestUnescapeSeq)
+ return manifestEscapeSeq.ReplaceAllStringFunc(s, manifestUnescapeFunc)
+}
+
+var manifestEscapedChar = regexp.MustCompile(`[^\.\w/]`)
+
+func manifestEscapeFunc(seq string) string {
+ return fmt.Sprintf("\\%03o", byte(seq[0]))
+}
+
+func manifestEscape(s string) string {
+ return manifestEscapedChar.ReplaceAllStringFunc(s, manifestEscapeFunc)
}