import os
import sys
-
import llfuse
+from llfuse import FUSEError
import errno
import stat
import threading
import apiclient
import json
import logging
+import time
+import calendar
+
+_logger = logging.getLogger('arvados.arvados_fuse')
+
+def convertTime(t):
+ return calendar.timegm(time.strptime(t, "%Y-%m-%dT%H:%M:%SZ"))
+
+def sanitize_filename(dirty):
+ # http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html
+ if dirty is None:
+ return None
+
+ fn = ""
+ for c in dirty:
+ if (c >= '\x00' and c <= '\x1f') or c == '\x7f' or c == '/':
+ # skip control characters and /
+ continue
+ fn += c
+
+ # strip leading - or ~ and leading/trailing whitespace
+ stripped = fn.lstrip("-~ ").rstrip()
+ if len(stripped) > 0:
+ return stripped
+ else:
+ return None
-from time import time
-from llfuse import FUSEError
class FreshBase(object):
'''Base class for maintaining fresh/stale state to determine when to update.'''
def __init__(self):
self._stale = True
self._poll = False
- self._last_update = time()
+ self._last_update = time.time()
self._poll_time = 60
# Mark the value as stale
if self._stale:
return True
if self._poll:
- return (self._last_update + self._poll_time) < time()
+ return (self._last_update + self._poll_time) < time.time()
return False
def fresh(self):
self._stale = False
- self._last_update = time()
+ self._last_update = time.time()
+
+ def ctime(self):
+ return 0
+
+ def mtime(self):
+ return 0
class File(FreshBase):
'''Base for file objects.'''
- def __init__(self, parent_inode):
+ def __init__(self, parent_inode, _ctime=0, _mtime=0):
super(File, self).__init__()
self.inode = None
self.parent_inode = parent_inode
+ self._ctime = _ctime
+ self._mtime = _mtime
def size(self):
return 0
def readfrom(self, off, size):
return ''
+ def ctime(self):
+ return self._ctime
+
+ def mtime(self):
+ return self._mtime
+
class StreamReaderFile(File):
'''Wraps a StreamFileReader as a file.'''
- def __init__(self, parent_inode, reader):
- super(StreamReaderFile, self).__init__(parent_inode)
+ def __init__(self, parent_inode, reader, _ctime, _mtime):
+ super(StreamReaderFile, self).__init__(parent_inode, _ctime, _mtime)
self.reader = reader
def size(self):
return False
-class ObjectFile(File):
- '''Wraps a dict as a serialized json object.'''
-
- def __init__(self, parent_inode, contents):
- super(ObjectFile, self).__init__(parent_inode)
- self.contentsdict = contents
- self.uuid = self.contentsdict['uuid']
- self.contents = json.dumps(self.contentsdict, indent=4, sort_keys=True)
+class StringFile(File):
+ '''Wrap a simple string as a file'''
+ def __init__(self, parent_inode, contents, _ctime, _mtime):
+ super(StringFile, self).__init__(parent_inode, _ctime, _mtime)
+ self.contents = contents
def size(self):
return len(self.contents)
def readfrom(self, off, size):
- return self.contents[off:(off+size)]
+ return self.contents[off:(off+size)]
+
+class ObjectFile(StringFile):
+ '''Wrap a dict as a serialized json object.'''
+
+ def __init__(self, parent_inode, contents):
+ _ctime = convertTime(contents['created_at']) if 'created_at' in contents else 0
+ _mtime = convertTime(contents['modified_at']) if 'modified_at' in contents else 0
+ super(ObjectFile, self).__init__(parent_inode, json.dumps(contents, indent=4, sort_keys=True)+"\n", _ctime, _mtime)
+ self.contentsdict = contents
+ self.uuid = self.contentsdict['uuid']
class Directory(FreshBase):
try:
self.update()
except apiclient.errors.HttpError as e:
- logging.debug(e)
+ _logger.debug(e)
def __getitem__(self, item):
self.checkupdate()
return k in self._entries
def merge(self, items, fn, same, new_entry):
- '''Helper method for updating the contents of the directory.
+ '''Helper method for updating the contents of the directory. Takes a list
+ describing the new contents of the directory, reuse entries that are
+ the same in both the old and new lists, create new entries, and delete
+ old entries missing from the new list.
- items: array with new directory contents
+ items: iterable with new directory contents
fn: function to take an entry in 'items' and return the desired file or
- directory name
+ directory name, or None if this entry should be skipped
- same: function to compare an existing entry with an entry in the items
- list to determine whether to keep the existing entry.
+ same: function to compare an existing entry (a File or Directory
+ object) with an entry in the items list to determine whether to keep
+ the existing entry.
+
+ new_entry: function to create a new directory entry (File or Directory
+ object) from an entry in the items list.
- new_entry: function to create a new directory entry from array entry.
'''
oldentries = self._entries
self._entries = {}
for i in items:
- n = fn(i)
- if n in oldentries and same(oldentries[n], i):
- self._entries[n] = oldentries[n]
- del oldentries[n]
- else:
- self._entries[n] = self.inodes.add_entry(new_entry(i))
+ name = sanitize_filename(fn(i))
+ if name:
+ if name in oldentries and same(oldentries[name], i):
+ # move existing directory entry over
+ self._entries[name] = oldentries[name]
+ del oldentries[name]
+ else:
+ # create new directory entry
+ ent = new_entry(i)
+ if ent is not None:
+ self._entries[name] = self.inodes.add_entry(ent)
+
+ # delete any other directory entries that were not in found in 'items'
+ for i in oldentries:
+ llfuse.invalidate_entry(self.inode, str(i))
+ self.inodes.del_entry(oldentries[i])
+ self.fresh()
+
+ def clear(self):
+ '''Delete all entries'''
+ oldentries = self._entries
+ self._entries = {}
for n in oldentries:
+ if isinstance(n, Directory):
+ n.clear()
llfuse.invalidate_entry(self.inode, str(n))
self.inodes.del_entry(oldentries[n])
- self.fresh()
+ self.invalidate()
class CollectionDirectory(Directory):
'''Represents the root of a directory tree holding a collection.'''
- def __init__(self, parent_inode, inodes, collection_locator):
+ def __init__(self, parent_inode, inodes, api, collection_locator):
super(CollectionDirectory, self).__init__(parent_inode)
self.inodes = inodes
+ self.api = api
self.collection_locator = collection_locator
+ self.manifest_text_file = None
+ self.pdh_file = None
+ self.collection_object = None
def same(self, i):
- return i['uuid'] == self.collection_locator
+ return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator
def update(self):
try:
- collection = arvados.CollectionReader(self.collection_locator)
- for s in collection.all_streams():
- cwd = self
- for part in s.name().split('/'):
- if part != '' and part != '.':
- if part not in cwd._entries:
- cwd._entries[part] = self.inodes.add_entry(Directory(cwd.inode))
- cwd = cwd._entries[part]
- for k, v in s.files().items():
- cwd._entries[k] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v))
+ if self.collection_object is not None and re.match(r'^[a-f0-9]{32}', self.collection_locator):
+ return True
+ #with llfuse.lock_released:
+ new_collection_object = self.api.collections().get(uuid=self.collection_locator).execute()
+ if "portable_data_hash" not in new_collection_object:
+ new_collection_object["portable_data_hash"] = new_collection_object["uuid"]
+
+ if self.collection_object is None or self.collection_object["portable_data_hash"] != new_collection_object["portable_data_hash"]:
+ self.collection_object = new_collection_object
+
+ if self.manifest_text_file is not None:
+ self.manifest_text_file.contents = self.collection_object["manifest_text"]
+ self.manifest_text_file._ctime = self.ctime()
+ self.manifest_text_file._mtime = self.mtime()
+ if self.pdh_file is not None:
+ self.pdh_file.contents = self.collection_object["portable_data_hash"]
+ self.pdh_file._ctime = self.ctime()
+ self.pdh_file._mtime = self.mtime()
+
+ self.clear()
+ collection = arvados.CollectionReader(self.collection_object["manifest_text"], self.api)
+ for s in collection.all_streams():
+ cwd = self
+ for part in s.name().split('/'):
+ if part != '' and part != '.':
+ partname = sanitize_filename(part)
+ if partname not in cwd._entries:
+ cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode))
+ cwd = cwd._entries[partname]
+ for k, v in s.files().items():
+ cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.ctime(), self.mtime()))
self.fresh()
return True
except Exception as detail:
- logging.debug("arv-mount %s: error: %s" % (self.collection_locator,detail))
+ _logger.error("arv-mount %s: error", self.collection_locator)
+ _logger.exception(detail)
return False
+ def __getitem__(self, item):
+ self.checkupdate()
+ if item == '.manifest_text':
+ if self.manifest_text_file is None:
+ self.manifest_text_file = StringFile(self.inode, self.collection_object["manifest_text"], self.ctime(), self.mtime())
+ self.inodes.add_entry(self.manifest_text_file)
+ return self.manifest_text_file
+ elif item == '.portable_data_hash':
+ if self.pdh_file is None:
+ self.pdh_file = StringFile(self.inode, self.collection_object["portable_data_hash"], self.ctime(), self.mtime())
+ self.inodes.add_entry(self.pdh_file)
+ return self.pdh_file
+ else:
+ return super(CollectionDirectory, self).__getitem__(item)
+
+ def __contains__(self, k):
+ if k in ('.manifest_text', '.portable_data_hash'):
+ return True
+ else:
+ return super(CollectionDirectory, self).__contains__(k)
+
+ def ctime(self):
+ self.checkupdate()
+ return convertTime(self.collection_object["created_at"])
+
+ def mtime(self):
+ self.checkupdate()
+ return convertTime(self.collection_object["modified_at"])
+
class MagicDirectory(Directory):
'''A special directory that logically contains the set of all extant keep
locators. When a file is referenced by lookup(), it is tested to see if it
to readdir().
'''
- def __init__(self, parent_inode, inodes):
+ def __init__(self, parent_inode, inodes, api):
super(MagicDirectory, self).__init__(parent_inode)
self.inodes = inodes
+ self.api = api
def __contains__(self, k):
if k in self._entries:
return True
try:
- e = self.inodes.add_entry(CollectionDirectory(self.inode, self.inodes, k))
+ e = self.inodes.add_entry(CollectionDirectory(self.inode, self.inodes, self.api, k))
if e.update():
self._entries[k] = e
return True
else:
return False
except Exception as e:
- logging.debug('arv-mount exception keep %s', e)
+ _logger.debug('arv-mount exception keep %s', e)
return False
def __getitem__(self, item):
else:
raise KeyError("No collection with id " + item)
-class TagsDirectory(Directory):
+class RecursiveInvalidateDirectory(Directory):
+ def invalidate(self):
+ if self.inode == llfuse.ROOT_INODE:
+ llfuse.lock.acquire()
+ try:
+ super(RecursiveInvalidateDirectory, self).invalidate()
+ for a in self._entries:
+ self._entries[a].invalidate()
+ except Exception as e:
+ _logger.exception(e)
+ finally:
+ if self.inode == llfuse.ROOT_INODE:
+ llfuse.lock.release()
+
+class TagsDirectory(RecursiveInvalidateDirectory):
'''A special directory that contains as subdirectories all tags visible to the user.'''
def __init__(self, parent_inode, inodes, api, poll_time=60):
super(TagsDirectory, self).__init__(parent_inode)
self.inodes = inodes
self.api = api
- try:
- arvados.events.subscribe(self.api, [['object_uuid', 'is_a', 'arvados#link']], lambda ev: self.invalidate())
- except:
- self._poll = True
- self._poll_time = poll_time
-
- def invalidate(self):
- with llfuse.lock:
- super(TagsDirectory, self).invalidate()
- for a in self._entries:
- self._entries[a].invalidate()
+ #try:
+ # arvados.events.subscribe(self.api, [['object_uuid', 'is_a', 'arvados#link']], lambda ev: self.invalidate())
+ #except:
+ self._poll = True
+ self._poll_time = poll_time
def update(self):
tags = self.api.links().list(filters=[['link_class', '=', 'tag']], select=['name'], distinct = True).execute()
if "items" in tags:
self.merge(tags['items'],
- lambda i: i['name'],
+ lambda i: i['name'] if 'name' in i else i['uuid'],
lambda a, i: a.tag == i,
lambda i: TagDirectory(self.inode, self.inodes, self.api, i['name'], poll=self._poll, poll_time=self._poll_time))
self.merge(taggedcollections['items'],
lambda i: i['head_uuid'],
lambda a, i: a.collection_locator == i['head_uuid'],
- lambda i: CollectionDirectory(self.inode, self.inodes, i['head_uuid']))
+ lambda i: CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid']))
-class GroupsDirectory(Directory):
- '''A special directory that contains as subdirectories all groups visible to the user.'''
+class ProjectDirectory(RecursiveInvalidateDirectory):
+ '''A special directory that contains the contents of a project.'''
- def __init__(self, parent_inode, inodes, api, poll_time=60):
- super(GroupsDirectory, self).__init__(parent_inode)
+ def __init__(self, parent_inode, inodes, api, project_object, poll=False, poll_time=60):
+ super(ProjectDirectory, self).__init__(parent_inode)
self.inodes = inodes
self.api = api
- try:
- arvados.events.subscribe(self.api, [], lambda ev: self.invalidate())
- except:
- self._poll = True
- self._poll_time = poll_time
-
- def invalidate(self):
- with llfuse.lock:
- super(GroupsDirectory, self).invalidate()
- for a in self._entries:
- self._entries[a].invalidate()
-
- def update(self):
- groups = self.api.groups().list(
- filters=[['group_class','=','project']]).execute()
- self.merge(groups['items'],
- lambda i: i['uuid'],
- lambda a, i: a.uuid == i['uuid'],
- lambda i: GroupDirectory(self.inode, self.inodes, self.api, i, poll=self._poll, poll_time=self._poll_time))
-
-
-class GroupDirectory(Directory):
- '''A special directory that contains the contents of a group.'''
-
- def __init__(self, parent_inode, inodes, api, uuid, poll=False, poll_time=60):
- super(GroupDirectory, self).__init__(parent_inode)
- self.inodes = inodes
- self.api = api
- self.uuid = uuid['uuid']
- self._poll = poll
- self._poll_time = poll_time
-
- def invalidate(self):
- with llfuse.lock:
- super(GroupDirectory, self).invalidate()
- for a in self._entries:
- self._entries[a].invalidate()
+ self.project_object = project_object
+ self.uuid = project_object['uuid']
def createDirectory(self, i):
- if re.match(r'[0-9a-f]{32}\+\d+', i['uuid']):
- return CollectionDirectory(self.inode, self.inodes, i['uuid'])
+ if re.match(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}', i['uuid']):
+ return CollectionDirectory(self.inode, self.inodes, self.api, i['uuid'])
elif re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', i['uuid']):
- return GroupDirectory(self.parent_inode, self.inodes, self.api, i, self._poll, self._poll_time)
+ return ProjectDirectory(self.inode, self.inodes, self.api, i, self._poll, self._poll_time)
+ elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']) and i['head_kind'] == 'arvados#collection':
+ return CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid'])
+ #elif re.match(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}', i['uuid']):
+ # return None
elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}', i['uuid']):
return ObjectFile(self.parent_inode, i)
- return None
+ else:
+ return None
def update(self):
- contents = self.api.groups().contents(uuid=self.uuid, include_linked=True).execute()
- links = {}
- for a in contents['links']:
- links[a['head_uuid']] = a['name']
-
- def choose_name(i):
- if i['uuid'] in links:
- return links[i['uuid']]
+ def namefn(i):
+ if 'name' in i:
+ if i['name'] is None:
+ return None
+ elif re.match(r'[a-z0-9]{5}-(4zz18|j7d0g)-[a-z0-9]{15}', i['uuid']):
+ # collection or subproject
+ return i['name']
+ elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']) and i['head_kind'] == 'arvados#collection':
+ # name link
+ return i['name']
+ elif 'kind' in i and i['kind'].startswith('arvados#'):
+ # something else
+ return "{}.{}".format(i['name'], i['kind'][8:])
else:
- return i['uuid']
+ return None
- def same(a, i):
+ def samefn(a, i):
if isinstance(a, CollectionDirectory):
return a.collection_locator == i['uuid']
- elif isinstance(a, GroupDirectory):
+ elif isinstance(a, ProjectDirectory):
return a.uuid == i['uuid']
elif isinstance(a, ObjectFile):
return a.uuid == i['uuid'] and not a.stale()
return False
- self.merge(contents['items'],
- choose_name,
- same,
+ #with llfuse.lock_released:
+ if re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', self.uuid):
+ self.project_object = self.api.groups().get(uuid=self.uuid).execute()
+ elif re.match(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}', self.uuid):
+ self.project_object = self.api.users().get(uuid=self.uuid).execute()
+
+ contents = arvados.util.list_all(self.api.groups().contents, uuid=self.uuid)
+ # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use.
+ contents += arvados.util.list_all(self.api.links().list, filters=[['tail_uuid', '=', self.uuid], ['link_class', '=', 'name']])
+
+ #print contents
+
+ self.merge(contents,
+ namefn,
+ samefn,
self.createDirectory)
+ def ctime(self):
+ return convertTime(self.project_object["created_at"]) if "created_at" in self.project_object else 0
+
+ def mtime(self):
+ return convertTime(self.project_object["modified_at"]) if "modified_at" in self.project_object else 0
+
+
+
+class SharedDirectory(RecursiveInvalidateDirectory):
+ '''A special directory that represents users or groups who have shared projects with me.'''
+
+ def __init__(self, parent_inode, inodes, api, exclude, poll=False, poll_time=60):
+ super(SharedDirectory, self).__init__(parent_inode)
+ self.current_user = api.users().current().execute()
+ self.inodes = inodes
+ self.api = api
+
+ # try:
+ # arvados.events.subscribe(self.api, [], lambda ev: self.invalidate())
+ # except:
+ self._poll = True
+ self._poll_time = poll_time
+
+ def update(self):
+ #with llfuse.lock_released:
+ all_projects = arvados.util.list_all(self.api.groups().list, filters=[['group_class','=','project']])
+ objects = {}
+ for ob in all_projects:
+ objects[ob['uuid']] = ob
+
+ roots = []
+ root_owners = {}
+ for ob in all_projects:
+ if ob['owner_uuid'] != self.current_user['uuid'] and ob['owner_uuid'] not in objects:
+ roots.append(ob)
+ root_owners[ob['owner_uuid']] = True
+
+ #with llfuse.lock_released:
+ lusers = arvados.util.list_all(self.api.users().list, filters=[['uuid','in', list(root_owners)]])
+ lgroups = arvados.util.list_all(self.api.groups().list, filters=[['uuid','in', list(root_owners)]])
+
+ users = {}
+ groups = {}
+
+ for l in lusers:
+ objects[l["uuid"]] = l
+ for l in lgroups:
+ objects[l["uuid"]] = l
+
+ contents = {}
+ for r in root_owners:
+ if r in objects:
+ obr = objects[r]
+ if "name" in obr:
+ contents[obr["name"]] = obr
+ if "first_name" in obr:
+ contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr
+
+ for r in roots:
+ if r['owner_uuid'] not in objects:
+ contents[r['name']] = r
+
+ try:
+ self.merge(contents.items(),
+ lambda i: i[0],
+ lambda a, i: a.uuid == i[1]['uuid'],
+ lambda i: ProjectDirectory(self.inode, self.inodes, self.api, i[1], poll=self._poll, poll_time=self._poll_time))
+ except Exception as e:
+ _logger.exception(e)
+
class FileHandle(object):
'''Connects a numeric file handle to a File or Directory object that has
entry.st_size = e.size()
- entry.st_blksize = 1024
- entry.st_blocks = e.size()/1024
- if e.size()/1024 != 0:
+ entry.st_blksize = 512
+ entry.st_blocks = (e.size()/512)
+ if e.size()/512 != 0:
entry.st_blocks += 1
entry.st_atime = 0
- entry.st_mtime = 0
- entry.st_ctime = 0
+ entry.st_mtime = e.mtime()
+ entry.st_ctime = e.ctime()
return entry
def lookup(self, parent_inode, name):
- logging.debug("arv-mount lookup: parent_inode %i name %s", parent_inode, name)
+ _logger.debug("arv-mount lookup: parent_inode %i name %s",
+ parent_inode, name)
inode = None
if name == '.':
return fh
def read(self, fh, off, size):
- logging.debug("arv-mount read %i %i %i", fh, off, size)
+ _logger.debug("arv-mount read %i %i %i", fh, off, size)
if fh in self._filehandles:
handle = self._filehandles[fh]
else:
del self._filehandles[fh]
def opendir(self, inode):
- logging.debug("arv-mount opendir: inode %i", inode)
+ _logger.debug("arv-mount opendir: inode %i", inode)
if inode in self.inodes:
p = self.inodes[inode]
return fh
def readdir(self, fh, off):
- logging.debug("arv-mount readdir: fh %i off %i", fh, off)
+ _logger.debug("arv-mount readdir: fh %i off %i", fh, off)
if fh in self._filehandles:
handle = self._filehandles[fh]
else:
raise llfuse.FUSEError(errno.EBADF)
- logging.debug("arv-mount handle.entry %s", handle.entry)
+ _logger.debug("arv-mount handle.entry %s", handle.entry)
e = off
while e < len(handle.entry):
def statfs(self):
st = llfuse.StatvfsData()
- st.f_bsize = 1024 * 1024
+ st.f_bsize = 64 * 1024
st.f_blocks = 0
st.f_files = 0