X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/2957b917aaaefc1485e1c5293d413f0931b3030a..HEAD:/sdk/go/arvados/fs_site.go diff --git a/sdk/go/arvados/fs_site.go b/sdk/go/arvados/fs_site.go index 82114e2ea9..d4f0241682 100644 --- a/sdk/go/arvados/fs_site.go +++ b/sdk/go/arvados/fs_site.go @@ -5,6 +5,7 @@ package arvados import ( + "net/http" "os" "strings" "sync" @@ -16,14 +17,22 @@ type CustomFileSystem interface { MountByID(mount string) MountProject(mount, uuid string) MountUsers(mount string) + ForwardSlashNameSubstitution(string) } type customFileSystem struct { fileSystem root *vdirnode + thr *throttle staleThreshold time.Time staleLock sync.Mutex + + forwardSlashNameSubstitution string + + byID map[string]inode + byIDLock sync.Mutex + byIDRoot *treenode } func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem { @@ -33,9 +42,10 @@ func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem { fileSystem: fileSystem{ fsBackend: keepBackend{apiClient: c, keepClient: kc}, root: root, + thr: newThrottle(concurrentWriters), }, } - root.inode = &treenode{ + root.treenode = treenode{ fs: fs, parent: root, fileinfo: fileinfo{ @@ -45,13 +55,26 @@ func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem { }, inodes: make(map[string]inode), } + fs.byID = map[string]inode{} + fs.byIDRoot = &treenode{ + fs: fs, + parent: root, + inodes: make(map[string]inode), + fileinfo: fileinfo{ + name: "_internal_by_id", + modTime: time.Now(), + mode: 0755 | os.ModeDir, + }, + } return fs } func (fs *customFileSystem) MountByID(mount string) { - fs.root.inode.Child(mount, func(inode) (inode, error) { + fs.root.treenode.Lock() + defer fs.root.treenode.Unlock() + fs.root.treenode.Child(mount, func(inode) (inode, error) { return &vdirnode{ - inode: &treenode{ + treenode: treenode{ fs: fs, parent: fs.root, inodes: make(map[string]inode), @@ -61,24 +84,28 @@ func (fs *customFileSystem) MountByID(mount string) { mode: 0755 | os.ModeDir, }, }, - create: fs.mountByID, + create: fs.newCollectionOrProjectHardlink, }, nil }) } func (fs *customFileSystem) MountProject(mount, uuid string) { - fs.root.inode.Child(mount, func(inode) (inode, error) { - return fs.newProjectNode(fs.root, mount, uuid), nil + fs.root.treenode.Lock() + defer fs.root.treenode.Unlock() + fs.root.treenode.Child(mount, func(inode) (inode, error) { + return fs.newProjectDir(fs.root, mount, uuid, nil), nil }) } func (fs *customFileSystem) MountUsers(mount string) { - fs.root.inode.Child(mount, func(inode) (inode, error) { + fs.root.treenode.Lock() + defer fs.root.treenode.Unlock() + fs.root.treenode.Child(mount, func(inode) (inode, error) { return &lookupnode{ stale: fs.Stale, loadOne: fs.usersLoadOne, loadAll: fs.usersLoadAll, - inode: &treenode{ + treenode: treenode{ fs: fs, parent: fs.root, inodes: make(map[string]inode), @@ -92,6 +119,14 @@ func (fs *customFileSystem) MountUsers(mount string) { }) } +func (fs *customFileSystem) ForwardSlashNameSubstitution(repl string) { + fs.forwardSlashNameSubstitution = repl +} + +func (fs *customFileSystem) MemorySize() int64 { + return fs.fileSystem.MemorySize() + fs.byIDRoot.MemorySize() +} + // SiteFileSystem returns a FileSystem that maps collections and other // Arvados objects onto a filesystem layout. // @@ -106,10 +141,7 @@ func (c *Client) SiteFileSystem(kc keepClient) CustomFileSystem { } func (fs *customFileSystem) Sync() error { - fs.staleLock.Lock() - defer fs.staleLock.Unlock() - fs.staleThreshold = time.Now() - return nil + return fs.byIDRoot.Sync() } // Stale returns true if information obtained at time t should be @@ -124,67 +156,177 @@ func (fs *customFileSystem) newNode(name string, perm os.FileMode, modTime time. return nil, ErrInvalidOperation } -func (fs *customFileSystem) mountByID(parent inode, id string) inode { +func (fs *customFileSystem) newCollectionOrProjectHardlink(parent inode, id string) (inode, error) { if strings.Contains(id, "-4zz18-") || pdhRegexp.MatchString(id) { - return fs.mountCollection(parent, id) - } else if strings.Contains(id, "-j7d0g-") { - return fs.newProjectNode(fs.root, id, id) + node, err := fs.collectionSingleton(id) + if os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, err + } + return &hardlink{inode: node, parent: parent, name: id}, nil + } else if strings.Contains(id, "-j7d0g-") || strings.Contains(id, "-tpzed-") { + fs.byIDLock.Lock() + node := fs.byID[id] + fs.byIDLock.Unlock() + if node == nil { + // Look up the project synchronously before + // calling projectSingleton (otherwise we + // wouldn't detect a nonexistent project until + // it's too late to return ErrNotExist). + proj, err := fs.getProject(id) + if os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, err + } + node = fs.projectSingleton(id, proj) + } + return &hardlink{inode: node, parent: parent, name: id}, nil } else { - return nil + return nil, nil } } -func (fs *customFileSystem) mountCollection(parent inode, id string) inode { - var coll Collection - err := fs.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+id, nil, nil) - if err != nil { - return nil +func (fs *customFileSystem) projectSingleton(uuid string, proj *Group) inode { + fs.byIDLock.Lock() + defer fs.byIDLock.Unlock() + if n := fs.byID[uuid]; n != nil { + return n } - cfs, err := coll.FileSystem(fs, fs) - if err != nil { - return nil + name := uuid + if name == "" { + // special case uuid=="" implements the "home project" + // (owner_uuid == current user uuid) + name = "home" } - root := cfs.rootnode() - root.SetParent(parent, id) - return root -} - -func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode { - return &lookupnode{ + var projLoading sync.Mutex + n := &lookupnode{ stale: fs.Stale, loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) }, loadAll: func(parent inode) ([]inode, error) { return fs.projectsLoadAll(parent, uuid) }, - inode: &treenode{ + treenode: treenode{ fs: fs, - parent: root, + parent: fs.byIDRoot, inodes: make(map[string]inode), fileinfo: fileinfo{ name: name, modTime: time.Now(), mode: 0755 | os.ModeDir, + sys: func() interface{} { + projLoading.Lock() + defer projLoading.Unlock() + if proj != nil { + return proj + } + g, err := fs.getProject(uuid) + if err != nil { + return err + } + proj = g + return proj + }, }, }, } + fs.byID[uuid] = n + return n } -// vdirnode wraps an inode by ignoring any requests to add/replace -// children, and calling a create() func when a non-existing child is -// looked up. +func (fs *customFileSystem) getProject(uuid string) (*Group, error) { + var g Group + err := fs.RequestAndDecode(&g, "GET", "arvados/v1/groups/"+uuid, nil, nil) + if statusErr, ok := err.(interface{ HTTPStatus() int }); ok && statusErr.HTTPStatus() == http.StatusNotFound { + return nil, os.ErrNotExist + } else if err != nil { + return nil, err + } + return &g, err +} + +func (fs *customFileSystem) collectionSingleton(id string) (inode, error) { + // Return existing singleton, if we have it + fs.byIDLock.Lock() + existing := fs.byID[id] + fs.byIDLock.Unlock() + if existing != nil { + return existing, nil + } + + coll, err := fs.getCollection(id) + if err != nil { + return nil, err + } + newfs, err := coll.FileSystem(fs, fs) + if err != nil { + return nil, err + } + cfs := newfs.(*collectionFileSystem) + cfs.SetParent(fs.byIDRoot, id) + + // Check again in case another goroutine has added a node to + // fs.byID since we checked above. + fs.byIDLock.Lock() + defer fs.byIDLock.Unlock() + if existing = fs.byID[id]; existing != nil { + // Other goroutine won the race. Discard the node we + // just made, and return the race winner. + return existing, nil + } + // We won the race. Save the new node in fs.byID and + // fs.byIDRoot. + fs.byID[id] = cfs + fs.byIDRoot.Lock() + defer fs.byIDRoot.Unlock() + fs.byIDRoot.Child(id, func(inode) (inode, error) { return cfs, nil }) + return cfs, nil +} + +func (fs *customFileSystem) getCollection(id string) (*Collection, error) { + var coll Collection + err := fs.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+id, nil, nil) + if statusErr, ok := err.(interface{ HTTPStatus() int }); ok && statusErr.HTTPStatus() == http.StatusNotFound { + return nil, os.ErrNotExist + } else if err != nil { + return nil, err + } + if len(id) != 27 { + // This means id is a PDH, and controller/railsapi + // returned one of (possibly) many collections with + // that PDH. Even if controller returns more fields + // besides PDH and manifest text (which are equal for + // all matching collections), we don't want to expose + // them (e.g., through Sys()). + coll = Collection{ + PortableDataHash: coll.PortableDataHash, + ManifestText: coll.ManifestText, + } + } + return &coll, nil +} + +// vdirnode wraps an inode by rejecting (with ErrInvalidOperation) +// calls that add/replace children directly, instead calling a +// create() func when a non-existing child is looked up. // // create() can return either a new node, which will be added to the // treenode, or nil for ENOENT. type vdirnode struct { - inode - create func(parent inode, name string) inode + treenode + create func(parent inode, name string) (inode, error) } func (vn *vdirnode) Child(name string, replace func(inode) (inode, error)) (inode, error) { - return vn.inode.Child(name, func(existing inode) (inode, error) { + return vn.treenode.Child(name, func(existing inode) (inode, error) { if existing == nil && vn.create != nil { - existing = vn.create(vn, name) - if existing != nil { - existing.SetParent(vn, name) - vn.inode.(*treenode).fileinfo.modTime = time.Now() + newnode, err := vn.create(vn, name) + if err != nil { + return nil, err + } + if newnode != nil { + newnode.SetParent(vn, name) + existing = newnode + vn.treenode.fileinfo.modTime = time.Now() } } if replace == nil { @@ -192,9 +334,63 @@ func (vn *vdirnode) Child(name string, replace func(inode) (inode, error)) (inod } else if tryRepl, err := replace(existing); err != nil { return existing, err } else if tryRepl != existing { - return existing, ErrInvalidArgument + return existing, ErrInvalidOperation } else { return existing, nil } }) } + +// A hardlink can be used to mount an existing node at an additional +// point in the same filesystem. +type hardlink struct { + inode + parent inode + name string +} + +// If the wrapped inode is a filesystem, rootnode returns the wrapped +// fs's rootnode, otherwise inode itself. This allows +// (*fileSystem)Rename() to lock the root node of a hardlink-wrapped +// filesystem. +func (hl *hardlink) rootnode() inode { + if node, ok := hl.inode.(interface{ rootnode() inode }); ok { + return node.rootnode() + } else { + return hl.inode + } +} + +func (hl *hardlink) Sync() error { + if node, ok := hl.inode.(syncer); ok { + return node.Sync() + } else { + return ErrInvalidOperation + } +} + +func (hl *hardlink) SetParent(parent inode, name string) { + hl.Lock() + defer hl.Unlock() + hl.parent = parent + hl.name = name +} + +func (hl *hardlink) Parent() inode { + hl.RLock() + defer hl.RUnlock() + return hl.parent +} + +func (hl *hardlink) FileInfo() os.FileInfo { + fi := hl.inode.FileInfo() + if fi, ok := fi.(fileinfo); ok { + fi.name = hl.name + return fi + } + return fi +} + +func (hl *hardlink) MemorySize() int64 { + return 64 + int64(len(hl.name)) +}