X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/cd41c68c201f5307ec3372fa141842e1be695a98..8b90f80efca772efd2697ffc70d7809c32564171:/services/fuse/arvados_fuse/__init__.py diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py index a80962b41d..71c4ee5a2c 100644 --- a/services/fuse/arvados_fuse/__init__.py +++ b/services/fuse/arvados_fuse/__init__.py @@ -5,7 +5,6 @@ import os import sys import llfuse -from llfuse import FUSEError import errno import stat import threading @@ -17,31 +16,75 @@ import apiclient import json import logging import time +import _strptime import calendar +import threading +import itertools + +from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern _logger = logging.getLogger('arvados.arvados_fuse') +# Match any character which FUSE or Linux cannot accommodate as part +# of a filename. (If present in a collection filename, they will +# appear as underscores in the fuse mount.) +_disallowed_filename_characters = re.compile('[\x00/]') + +class SafeApi(object): + """Threadsafe wrapper for API object. + + This stores and returns a different api object per thread, because + httplib2 which underlies apiclient is not threadsafe. + """ + + def __init__(self, config): + self.host = config.get('ARVADOS_API_HOST') + self.api_token = config.get('ARVADOS_API_TOKEN') + self.insecure = config.flag_is_true('ARVADOS_API_HOST_INSECURE') + self.local = threading.local() + self.block_cache = arvados.KeepBlockCache() + + def localapi(self): + if 'api' not in self.local.__dict__: + self.local.api = arvados.api( + version='v1', + host=self.host, token=self.api_token, insecure=self.insecure) + return self.local.api + + def localkeep(self): + if 'keep' not in self.local.__dict__: + self.local.keep = arvados.KeepClient(api_client=self.localapi(), block_cache=self.block_cache) + return self.local.keep + + def __getattr__(self, name): + # Proxy nonexistent attributes to the local API client. + try: + return getattr(self.localapi(), name) + except AttributeError: + return super(SafeApi, self).__getattr__(name) + + def convertTime(t): - return calendar.timegm(time.strptime(t, "%Y-%m-%dT%H:%M:%SZ")) + """Parse Arvados timestamp to unix time.""" + if not t: + return 0 + try: + return calendar.timegm(time.strptime(t, "%Y-%m-%dT%H:%M:%SZ")) + except (TypeError, ValueError): + return 0 def sanitize_filename(dirty): - # http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html + '''Replace disallowed filename characters with harmless "_".''' if dirty is None: return None - - fn = "" - for c in dirty: - if (c >= '\x00' and c <= '\x1f') or c == '\x7f' or c == '/': - # skip control characters and / - continue - fn += c - - # strip leading - or ~ and leading/trailing whitespace - stripped = fn.lstrip("-~ ").rstrip() - if len(stripped) > 0: - return stripped + elif dirty == '': + return '_' + elif dirty == '.': + return '_' + elif dirty == '..': + return '__' else: - return None + return _disallowed_filename_characters.sub('_', dirty) class FreshBase(object): @@ -50,39 +93,35 @@ class FreshBase(object): self._stale = True self._poll = False self._last_update = time.time() + self._atime = time.time() self._poll_time = 60 # Mark the value as stale def invalidate(self): self._stale = True - # Test if the entries dict is stale + # Test if the entries dict is stale. def stale(self): if self._stale: return True if self._poll: - return (self._last_update + self._poll_time) < time.time() + return (self._last_update + self._poll_time) < self._atime return False def fresh(self): self._stale = False self._last_update = time.time() - def ctime(self): - return 0 - - def mtime(self): - return 0 - + def atime(self): + return self._atime class File(FreshBase): '''Base for file objects.''' - def __init__(self, parent_inode, _ctime=0, _mtime=0): + def __init__(self, parent_inode, _mtime=0): super(File, self).__init__() self.inode = None self.parent_inode = parent_inode - self._ctime = _ctime self._mtime = _mtime def size(self): @@ -91,9 +130,6 @@ class File(FreshBase): def readfrom(self, off, size): return '' - def ctime(self): - return self._ctime - def mtime(self): return self._mtime @@ -101,8 +137,8 @@ class File(FreshBase): class StreamReaderFile(File): '''Wraps a StreamFileReader as a file.''' - def __init__(self, parent_inode, reader, _ctime, _mtime): - super(StreamReaderFile, self).__init__(parent_inode, _ctime, _mtime) + def __init__(self, parent_inode, reader, _mtime): + super(StreamReaderFile, self).__init__(parent_inode, _mtime) self.reader = reader def size(self): @@ -117,23 +153,28 @@ class StreamReaderFile(File): class StringFile(File): '''Wrap a simple string as a file''' - def __init__(self, parent_inode, contents, _ctime, _mtime): - super(StringFile, self).__init__(parent_inode, _ctime, _mtime) + def __init__(self, parent_inode, contents, _mtime): + super(StringFile, self).__init__(parent_inode, _mtime) self.contents = contents def size(self): return len(self.contents) def readfrom(self, off, size): - return self.contents[off:(off+size)] + return self.contents[off:(off+size)] + class ObjectFile(StringFile): '''Wrap a dict as a serialized json object.''' - def __init__(self, parent_inode, contents): - super(ObjectFile, self).__init__(parent_inode, json.dumps(self.contentsdict, indent=4, sort_keys=True)) - self.contentsdict = contents - self.uuid = self.contentsdict['uuid'] + def __init__(self, parent_inode, obj): + super(ObjectFile, self).__init__(parent_inode, "", 0) + self.uuid = obj['uuid'] + self.update(obj) + + def update(self, obj): + self._mtime = convertTime(obj['modified_at']) if 'modified_at' in obj else 0 + self.contents = json.dumps(obj, indent=4, sort_keys=True) + "\n" class Directory(FreshBase): @@ -151,6 +192,7 @@ class Directory(FreshBase): raise Exception("parent_inode should be an int") self.parent_inode = parent_inode self._entries = {} + self._mtime = time.time() # Overriden by subclasses to implement logic to update the entries dict # when the directory is stale @@ -207,6 +249,7 @@ class Directory(FreshBase): oldentries = self._entries self._entries = {} + changed = False for i in items: name = sanitize_filename(fn(i)) if name: @@ -219,11 +262,17 @@ class Directory(FreshBase): ent = new_entry(i) if ent is not None: self._entries[name] = self.inodes.add_entry(ent) + changed = True # delete any other directory entries that were not in found in 'items' - for i in oldentries: + for i in oldentries: llfuse.invalidate_entry(self.inode, str(i)) self.inodes.del_entry(oldentries[i]) + changed = True + + if changed: + self._mtime = time.time() + self.fresh() def clear(self): @@ -235,91 +284,118 @@ class Directory(FreshBase): n.clear() llfuse.invalidate_entry(self.inode, str(n)) self.inodes.del_entry(oldentries[n]) + llfuse.invalidate_inode(self.inode) self.invalidate() + def mtime(self): + return self._mtime + class CollectionDirectory(Directory): '''Represents the root of a directory tree holding a collection.''' - def __init__(self, parent_inode, inodes, api, collection_locator): + def __init__(self, parent_inode, inodes, api, num_retries, collection): super(CollectionDirectory, self).__init__(parent_inode) self.inodes = inodes self.api = api - self.collection_locator = collection_locator - self.manifest_text_file = None - self.pdh_file = None + self.num_retries = num_retries + self.collection_object_file = None self.collection_object = None + if isinstance(collection, dict): + self.collection_locator = collection['uuid'] + self._mtime = convertTime(collection.get('modified_at')) + else: + self.collection_locator = collection + self._mtime = 0 def same(self, i): return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator + # Used by arv-web.py to switch the contents of the CollectionDirectory + def change_collection(self, new_locator): + """Switch the contents of the CollectionDirectory. Must be called with llfuse.lock held.""" + self.collection_locator = new_locator + self.collection_object = None + self.update() + + def new_collection(self, new_collection_object, coll_reader): + self.collection_object = new_collection_object + + self._mtime = convertTime(self.collection_object.get('modified_at')) + + if self.collection_object_file is not None: + self.collection_object_file.update(self.collection_object) + + self.clear() + for s in coll_reader.all_streams(): + cwd = self + for part in s.name().split('/'): + if part != '' and part != '.': + partname = sanitize_filename(part) + if partname not in cwd._entries: + cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode)) + cwd = cwd._entries[partname] + for k, v in s.files().items(): + cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.mtime())) + def update(self): try: - new_collection_object = self.api.collections().get(uuid=self.collection_locator).execute() - if "portable_data_hash" not in new_collection_object: - new_collection_object["portable_data_hash"] = new_collection_object["uuid"] + if self.collection_object is not None and portable_data_hash_pattern.match(self.collection_locator): + return True + + if self.collection_locator is None: + self.fresh() + return True + + with llfuse.lock_released: + coll_reader = arvados.CollectionReader( + self.collection_locator, self.api, self.api.localkeep(), + num_retries=self.num_retries) + new_collection_object = coll_reader.api_response() or {} + # If the Collection only exists in Keep, there will be no API + # response. Fill in the fields we need. + if 'uuid' not in new_collection_object: + new_collection_object['uuid'] = self.collection_locator + if "portable_data_hash" not in new_collection_object: + new_collection_object["portable_data_hash"] = new_collection_object["uuid"] + if 'manifest_text' not in new_collection_object: + new_collection_object['manifest_text'] = coll_reader.manifest_text() + coll_reader.normalize() + # end with llfuse.lock_released, re-acquire lock if self.collection_object is None or self.collection_object["portable_data_hash"] != new_collection_object["portable_data_hash"]: - self.collection_object = new_collection_object - - if self.manifest_text_file is not None: - self.manifest_text_file.contents = self.collection_object["manifest_text"] - self.manifest_text_file._ctime = self.ctime() - self.manifest_text_file._mtime = self.mtime() - if self.pdh_file is not None: - self.pdh_file.contents = self.collection_object["portable_data_hash"] - self.pdh_file._ctime = self.ctime() - self.pdh_file._mtime = self.mtime() - - self.clear() - collection = arvados.CollectionReader(self.collection_object["manifest_text"], self.api) - for s in collection.all_streams(): - cwd = self - for part in s.name().split('/'): - if part != '' and part != '.': - partname = sanitize_filename(part) - if partname not in cwd._entries: - cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode)) - cwd = cwd._entries[partname] - for k, v in s.files().items(): - cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.ctime(), self.mtime())) + self.new_collection(new_collection_object, coll_reader) + self.fresh() return True - except Exception as detail: - _logger.error("arv-mount %s: error", self.collection_locator) - _logger.exception(detail) - return False + except arvados.errors.NotFoundError: + _logger.exception("arv-mount %s: error", self.collection_locator) + except arvados.errors.ArgumentError as detail: + _logger.warning("arv-mount %s: error %s", self.collection_locator, detail) + if self.collection_object is not None and "manifest_text" in self.collection_object: + _logger.warning("arv-mount manifest_text is: %s", self.collection_object["manifest_text"]) + except Exception: + _logger.exception("arv-mount %s: error", self.collection_locator) + if self.collection_object is not None and "manifest_text" in self.collection_object: + _logger.error("arv-mount manifest_text is: %s", self.collection_object["manifest_text"]) + return False def __getitem__(self, item): self.checkupdate() - if item == '.manifest_text': - if self.manifest_text_file is None: - self.manifest_text_file = StringFile(self.inode, self.collection_object["manifest_text"], self.ctime(), self.mtime()) - self.inodes.add_entry(self.manifest_text_file) - return self.manifest_text_file - elif item == '.portable_data_hash': - if self.pdh_file is None: - self.pdh_file = StringFile(self.inode, self.collection_object["portable_data_hash"], self.ctime(), self.mtime()) - print self.ctime - print self.pdh_file._ctime - self.inodes.add_entry(self.pdh_file) - return self.pdh_file + if item == '.arvados#collection': + if self.collection_object_file is None: + self.collection_object_file = ObjectFile(self.inode, self.collection_object) + self.inodes.add_entry(self.collection_object_file) + return self.collection_object_file else: return super(CollectionDirectory, self).__getitem__(item) def __contains__(self, k): - if k == '.manifest_text' or '.portable_data_hash': + if k == '.arvados#collection': return True else: return super(CollectionDirectory, self).__contains__(k) - def ctime(self): - self.checkupdate() - return convertTime(self.collection_object["created_at"]) - - def mtime(self): - self.checkupdate() - return convertTime(self.collection_object["modified_at"]) class MagicDirectory(Directory): '''A special directory that logically contains the set of all extant keep @@ -331,16 +407,45 @@ class MagicDirectory(Directory): to readdir(). ''' - def __init__(self, parent_inode, inodes, api): + README_TEXT = ''' +This directory provides access to Arvados collections as subdirectories listed +by uuid (in the form 'zzzzz-4zz18-1234567890abcde') or portable data hash (in +the form '1234567890abcdefghijklmnopqrstuv+123'). + +Note that this directory will appear empty until you attempt to access a +specific collection subdirectory (such as trying to 'cd' into it), at which +point the collection will actually be looked up on the server and the directory +will appear if it exists. +'''.lstrip() + + def __init__(self, parent_inode, inodes, api, num_retries): super(MagicDirectory, self).__init__(parent_inode) self.inodes = inodes self.api = api + self.num_retries = num_retries + + def __setattr__(self, name, value): + super(MagicDirectory, self).__setattr__(name, value) + # When we're assigned an inode, add a README. + if ((name == 'inode') and (self.inode is not None) and + (not self._entries)): + self._entries['README'] = self.inodes.add_entry( + StringFile(self.inode, self.README_TEXT, time.time())) + # If we're the root directory, add an identical by_id subdirectory. + if self.inode == llfuse.ROOT_INODE: + self._entries['by_id'] = self.inodes.add_entry(MagicDirectory( + self.inode, self.inodes, self.api, self.num_retries)) def __contains__(self, k): if k in self._entries: return True + + if not portable_data_hash_pattern.match(k) and not uuid_pattern.match(k): + return False + try: - e = self.inodes.add_entry(CollectionDirectory(self.inode, self.inodes, self.api, k)) + e = self.inodes.add_entry(CollectionDirectory( + self.inode, self.inodes, self.api, self.num_retries, k)) if e.update(): self._entries[k] = e return True @@ -356,6 +461,7 @@ class MagicDirectory(Directory): else: raise KeyError("No collection with id " + item) + class RecursiveInvalidateDirectory(Directory): def invalidate(self): if self.inode == llfuse.ROOT_INODE: @@ -364,83 +470,118 @@ class RecursiveInvalidateDirectory(Directory): super(RecursiveInvalidateDirectory, self).invalidate() for a in self._entries: self._entries[a].invalidate() - except Exception as e: - _logger.exception(e) + except Exception: + _logger.exception() finally: if self.inode == llfuse.ROOT_INODE: llfuse.lock.release() + class TagsDirectory(RecursiveInvalidateDirectory): '''A special directory that contains as subdirectories all tags visible to the user.''' - def __init__(self, parent_inode, inodes, api, poll_time=60): + def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60): super(TagsDirectory, self).__init__(parent_inode) self.inodes = inodes self.api = api - try: - arvados.events.subscribe(self.api, [['object_uuid', 'is_a', 'arvados#link']], lambda ev: self.invalidate()) - except: - self._poll = True - self._poll_time = poll_time + self.num_retries = num_retries + self._poll = True + self._poll_time = poll_time def update(self): - tags = self.api.links().list(filters=[['link_class', '=', 'tag']], select=['name'], distinct = True).execute() + with llfuse.lock_released: + tags = self.api.links().list( + filters=[['link_class', '=', 'tag']], + select=['name'], distinct=True + ).execute(num_retries=self.num_retries) if "items" in tags: self.merge(tags['items'], - lambda i: i['name'] if 'name' in i else i['uuid'], - lambda a, i: a.tag == i, - lambda i: TagDirectory(self.inode, self.inodes, self.api, i['name'], poll=self._poll, poll_time=self._poll_time)) + lambda i: i['name'], + lambda a, i: a.tag == i['name'], + lambda i: TagDirectory(self.inode, self.inodes, self.api, self.num_retries, i['name'], poll=self._poll, poll_time=self._poll_time)) + class TagDirectory(Directory): '''A special directory that contains as subdirectories all collections visible to the user that are tagged with a particular tag. ''' - def __init__(self, parent_inode, inodes, api, tag, poll=False, poll_time=60): + def __init__(self, parent_inode, inodes, api, num_retries, tag, + poll=False, poll_time=60): super(TagDirectory, self).__init__(parent_inode) self.inodes = inodes self.api = api + self.num_retries = num_retries self.tag = tag self._poll = poll self._poll_time = poll_time def update(self): - taggedcollections = self.api.links().list(filters=[['link_class', '=', 'tag'], - ['name', '=', self.tag], - ['head_uuid', 'is_a', 'arvados#collection']], - select=['head_uuid']).execute() + with llfuse.lock_released: + taggedcollections = self.api.links().list( + filters=[['link_class', '=', 'tag'], + ['name', '=', self.tag], + ['head_uuid', 'is_a', 'arvados#collection']], + select=['head_uuid'] + ).execute(num_retries=self.num_retries) self.merge(taggedcollections['items'], lambda i: i['head_uuid'], lambda a, i: a.collection_locator == i['head_uuid'], - lambda i: CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid'])) + lambda i: CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid'])) -class ProjectDirectory(RecursiveInvalidateDirectory): +class ProjectDirectory(Directory): '''A special directory that contains the contents of a project.''' - def __init__(self, parent_inode, inodes, api, project_object, poll=False, poll_time=60): + def __init__(self, parent_inode, inodes, api, num_retries, project_object, + poll=False, poll_time=60): super(ProjectDirectory, self).__init__(parent_inode) self.inodes = inodes self.api = api + self.num_retries = num_retries self.project_object = project_object + self.project_object_file = None self.uuid = project_object['uuid'] + self._poll = poll + self._poll_time = poll_time def createDirectory(self, i): - if re.match(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}', i['uuid']): - return CollectionDirectory(self.inode, self.inodes, self.api, i['uuid']) - elif re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', i['uuid']): - return ProjectDirectory(self.inode, self.inodes, self.api, i, self._poll, self._poll_time) - elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']) and i['head_kind'] == 'arvados#collection': - return CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid']) - #elif re.match(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}', i['uuid']): - # return None - #elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}', i['uuid']): - # return ObjectFile(self.parent_inode, i) + if collection_uuid_pattern.match(i['uuid']): + return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i) + elif group_uuid_pattern.match(i['uuid']): + return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i, self._poll, self._poll_time) + elif link_uuid_pattern.match(i['uuid']): + if i['head_kind'] == 'arvados#collection' or portable_data_hash_pattern.match(i['head_uuid']): + return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid']) + else: + return None + elif uuid_pattern.match(i['uuid']): + return ObjectFile(self.parent_inode, i) else: return None def update(self): - def same(a, i): + if self.project_object_file == None: + self.project_object_file = ObjectFile(self.inode, self.project_object) + self.inodes.add_entry(self.project_object_file) + + def namefn(i): + if 'name' in i: + if i['name'] is None or len(i['name']) == 0: + return None + elif collection_uuid_pattern.match(i['uuid']) or group_uuid_pattern.match(i['uuid']): + # collection or subproject + return i['name'] + elif link_uuid_pattern.match(i['uuid']) and i['head_kind'] == 'arvados#collection': + # name link + return i['name'] + elif 'kind' in i and i['kind'].startswith('arvados#'): + # something else + return "{}.{}".format(i['name'], i['kind'][8:]) + else: + return None + + def samefn(a, i): if isinstance(a, CollectionDirectory): return a.collection_locator == i['uuid'] elif isinstance(a, ProjectDirectory): @@ -449,91 +590,110 @@ class ProjectDirectory(RecursiveInvalidateDirectory): return a.uuid == i['uuid'] and not a.stale() return False - if re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', self.uuid): - self.project_object = self.api.groups().get(uuid=self.uuid).execute() - elif re.match(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}', self.uuid): - self.project_object = self.api.users().get(uuid=self.uuid).execute() + with llfuse.lock_released: + if group_uuid_pattern.match(self.uuid): + self.project_object = self.api.groups().get( + uuid=self.uuid).execute(num_retries=self.num_retries) + elif user_uuid_pattern.match(self.uuid): + self.project_object = self.api.users().get( + uuid=self.uuid).execute(num_retries=self.num_retries) - contents = arvados.util.list_all(self.api.groups().contents, uuid=self.uuid) + contents = arvados.util.list_all(self.api.groups().contents, + self.num_retries, uuid=self.uuid) + # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use. + contents += arvados.util.list_all( + self.api.links().list, self.num_retries, + filters=[['tail_uuid', '=', self.uuid], + ['link_class', '=', 'name']]) - # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use. - contents += arvados.util.list_all(self.api.links().list, filters=[['tail_uuid', '=', self.uuid], ['link_class', '=', 'name']]) + # end with llfuse.lock_released, re-acquire lock self.merge(contents, - lambda i: i['name'] if 'name' in i and i['name'] is not None and len(i['name']) > 0 else i['uuid'], - same, + namefn, + samefn, self.createDirectory) - def ctime(self): - return convertTime(self.project_object["created_at"]) if "created_at" in self.project_object else 0 - - def mtime(self): - return convertTime(self.project_object["modified_at"]) if "modified_at" in self.project_object else 0 + def __getitem__(self, item): + self.checkupdate() + if item == '.arvados#project': + return self.project_object_file + else: + return super(ProjectDirectory, self).__getitem__(item) + def __contains__(self, k): + if k == '.arvados#project': + return True + else: + return super(ProjectDirectory, self).__contains__(k) -class HomeDirectory(RecursiveInvalidateDirectory): +class SharedDirectory(Directory): '''A special directory that represents users or groups who have shared projects with me.''' - def __init__(self, parent_inode, inodes, api, poll=False, poll_time=60): - super(HomeDirectory, self).__init__(parent_inode) - self.current_user = api.users().current().execute() + def __init__(self, parent_inode, inodes, api, num_retries, exclude, + poll=False, poll_time=60): + super(SharedDirectory, self).__init__(parent_inode) self.inodes = inodes self.api = api - - # try: - # arvados.events.subscribe(self.api, [], lambda ev: self.invalidate()) - # except: - # self._poll = True - # self._poll_time = poll_time + self.num_retries = num_retries + self.current_user = api.users().current().execute(num_retries=num_retries) + self._poll = True + self._poll_time = poll_time def update(self): - all_projects = arvados.util.list_all(self.api.groups().list, filters=[['group_class','=','project']]) - objects = {} - for ob in all_projects: - objects[ob['uuid']] = ob - - roots = [] - root_owners = {} - for ob in all_projects: - if ob['owner_uuid'] == self.current_user['uuid'] or ob['owner_uuid'] not in objects: - roots.append(ob) - root_owners[ob['owner_uuid']] = True - - lusers = arvados.util.list_all(self.api.users().list, filters=[['uuid','in', list(root_owners)]]) - lgroups = arvados.util.list_all(self.api.groups().list, filters=[['uuid','in', list(root_owners)]]) - - users = {} - groups = {} - - for l in lusers: - objects[l["uuid"]] = l - for l in lgroups: - objects[l["uuid"]] = l - - contents = {} - for r in root_owners: - if r in objects: - obr = objects[r] - if "name" in obr: - contents[obr["name"]] = obr - if "first_name" in obr: - contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr - - for r in roots: - if r['owner_uuid'] not in objects: - contents[r['name']] = r - + with llfuse.lock_released: + all_projects = arvados.util.list_all( + self.api.groups().list, self.num_retries, + filters=[['group_class','=','project']]) + objects = {} + for ob in all_projects: + objects[ob['uuid']] = ob + + roots = [] + root_owners = {} + for ob in all_projects: + if ob['owner_uuid'] != self.current_user['uuid'] and ob['owner_uuid'] not in objects: + roots.append(ob) + root_owners[ob['owner_uuid']] = True + + lusers = arvados.util.list_all( + self.api.users().list, self.num_retries, + filters=[['uuid','in', list(root_owners)]]) + lgroups = arvados.util.list_all( + self.api.groups().list, self.num_retries, + filters=[['uuid','in', list(root_owners)]]) + + users = {} + groups = {} + + for l in lusers: + objects[l["uuid"]] = l + for l in lgroups: + objects[l["uuid"]] = l + + contents = {} + for r in root_owners: + if r in objects: + obr = objects[r] + if "name" in obr: + contents[obr["name"]] = obr + if "first_name" in obr: + contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr + + for r in roots: + if r['owner_uuid'] not in objects: + contents[r['name']] = r + + # end with llfuse.lock_released, re-acquire lock + try: self.merge(contents.items(), lambda i: i[0], lambda a, i: a.uuid == i[1]['uuid'], - lambda i: ProjectDirectory(self.inode, self.inodes, self.api, i[1], poll=self._poll, poll_time=self._poll_time)) - except Exception as e: - _logger.exception(e) + lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time)) + except Exception: + _logger.exception() - #def contents(self): - # return self.api.groups().contents(uuid=self.uuid).execute()['items'] class FileHandle(object): '''Connects a numeric file handle to a File or Directory object that has @@ -550,7 +710,7 @@ class Inodes(object): def __init__(self): self._entries = {} - self._counter = llfuse.ROOT_INODE + self._counter = itertools.count(llfuse.ROOT_INODE) def __getitem__(self, item): return self._entries[item] @@ -568,9 +728,8 @@ class Inodes(object): return k in self._entries def add_entry(self, entry): - entry.inode = self._counter + entry.inode = next(self._counter) self._entries[entry.inode] = entry - self._counter += 1 return entry def del_entry(self, entry): @@ -584,14 +743,15 @@ class Operations(llfuse.Operations): llfuse has its own global lock which is acquired before calling a request handler, so request handlers do not run concurrently unless the lock is explicitly released - with llfuse.lock_released.''' + using "with llfuse.lock_released:"''' - def __init__(self, uid, gid): + def __init__(self, uid, gid, encoding="utf-8"): super(Operations, self).__init__() self.inodes = Inodes() self.uid = uid self.gid = gid + self.encoding = encoding # dict of inode to filehandle self._filehandles = {} @@ -624,6 +784,8 @@ class Operations(llfuse.Operations): entry.st_mode = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH if isinstance(e, Directory): entry.st_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH | stat.S_IFDIR + elif isinstance(e, StreamReaderFile): + entry.st_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH | stat.S_IFREG else: entry.st_mode |= stat.S_IFREG @@ -634,17 +796,16 @@ class Operations(llfuse.Operations): entry.st_size = e.size() - entry.st_blksize = 1024 - entry.st_blocks = e.size()/1024 - if e.size()/1024 != 0: - entry.st_blocks += 1 - entry.st_atime = 0 - entry.st_mtime = e.mtime() - entry.st_ctime = e.ctime() + entry.st_blksize = 512 + entry.st_blocks = (e.size()/512)+1 + entry.st_atime = int(e.atime()) + entry.st_mtime = int(e.mtime()) + entry.st_ctime = int(e.mtime()) return entry def lookup(self, parent_inode, name): + name = unicode(name, self.encoding) _logger.debug("arv-mount lookup: parent_inode %i name %s", parent_inode, name) inode = None @@ -656,7 +817,7 @@ class Operations(llfuse.Operations): p = self.inodes[parent_inode] if name == '..': inode = p.parent_inode - elif name in p: + elif isinstance(p, Directory) and name in p: inode = p[name].inode if inode != None: @@ -688,10 +849,17 @@ class Operations(llfuse.Operations): else: raise llfuse.FUSEError(errno.EBADF) + # update atime + handle.entry._atime = time.time() + try: with llfuse.lock_released: return handle.entry.readfrom(off, size) - except: + except arvados.errors.NotFoundError as e: + _logger.warning("Block not found: " + str(e)) + raise llfuse.FUSEError(errno.EIO) + except Exception: + _logger.exception() raise llfuse.FUSEError(errno.EIO) def release(self, fh): @@ -716,6 +884,9 @@ class Operations(llfuse.Operations): else: raise llfuse.FUSEError(errno.EIO) + # update atime + p._atime = time.time() + self._filehandles[fh] = FileHandle(fh, [('.', p), ('..', parent)] + list(p.items())) return fh @@ -732,7 +903,10 @@ class Operations(llfuse.Operations): e = off while e < len(handle.entry): if handle.entry[e][1].inode in self.inodes: - yield (handle.entry[e][0], self.getattr(handle.entry[e][1].inode), e+1) + try: + yield (handle.entry[e][0].encode(self.encoding), self.getattr(handle.entry[e][1].inode), e+1) + except UnicodeEncodeError: + pass e += 1 def releasedir(self, fh): @@ -740,7 +914,7 @@ class Operations(llfuse.Operations): def statfs(self): st = llfuse.StatvfsData() - st.f_bsize = 1024 * 1024 + st.f_bsize = 64 * 1024 st.f_blocks = 0 st.f_files = 0