X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/0576030b3181b72f8395c73e0f3562582b59c2aa..c51634cad3c68d0a8400f1b1c47da9eef5307e06:/services/fuse/arvados_fuse/__init__.py diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py index f026d4e25c..f49b94777b 100644 --- a/services/fuse/arvados_fuse/__init__.py +++ b/services/fuse/arvados_fuse/__init__.py @@ -19,6 +19,7 @@ import logging import time import calendar import threading +from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern _logger = logging.getLogger('arvados.arvados_fuse') @@ -33,12 +34,18 @@ class SafeApi(object): self.token = config.get('ARVADOS_API_TOKEN') self.insecure = config.flag_is_true('ARVADOS_API_HOST_INSECURE') self.local = threading.local() + self.block_cache = arvados.KeepBlockCache() def localapi(self): if 'api' not in self.local.__dict__: self.local.api = arvados.api('v1', False, self.host, self.token, self.insecure) return self.local.api + def localkeep(self): + if 'keep' not in self.local.__dict__: + self.local.keep = arvados.KeepClient(api_client=self.localapi(), block_cache=self.block_cache) + return self.local.keep + def collections(self): return self.localapi().collections() @@ -50,10 +57,13 @@ class SafeApi(object): def users(self): return self.localapi().users() - + def convertTime(t): '''Parse Arvados timestamp to unix time.''' - return calendar.timegm(time.strptime(t, "%Y-%m-%dT%H:%M:%SZ")) + try: + return calendar.timegm(time.strptime(t, "%Y-%m-%dT%H:%M:%SZ")) + except (TypeError, ValueError): + return 0 def sanitize_filename(dirty): '''Remove troublesome characters from filenames.''' @@ -82,39 +92,35 @@ class FreshBase(object): self._stale = True self._poll = False self._last_update = time.time() + self._atime = time.time() self._poll_time = 60 # Mark the value as stale def invalidate(self): self._stale = True - # Test if the entries dict is stale + # Test if the entries dict is stale. def stale(self): if self._stale: return True if self._poll: - return (self._last_update + self._poll_time) < time.time() + return (self._last_update + self._poll_time) < self._atime return False def fresh(self): self._stale = False self._last_update = time.time() - def ctime(self): - return 0 - - def mtime(self): - return 0 - + def atime(self): + return self._atime class File(FreshBase): '''Base for file objects.''' - def __init__(self, parent_inode, _ctime=0, _mtime=0): + def __init__(self, parent_inode, _mtime=0): super(File, self).__init__() self.inode = None self.parent_inode = parent_inode - self._ctime = _ctime self._mtime = _mtime def size(self): @@ -123,9 +129,6 @@ class File(FreshBase): def readfrom(self, off, size): return '' - def ctime(self): - return self._ctime - def mtime(self): return self._mtime @@ -133,8 +136,8 @@ class File(FreshBase): class StreamReaderFile(File): '''Wraps a StreamFileReader as a file.''' - def __init__(self, parent_inode, reader, _ctime, _mtime): - super(StreamReaderFile, self).__init__(parent_inode, _ctime, _mtime) + def __init__(self, parent_inode, reader, _mtime): + super(StreamReaderFile, self).__init__(parent_inode, _mtime) self.reader = reader def size(self): @@ -149,27 +152,26 @@ class StreamReaderFile(File): class StringFile(File): '''Wrap a simple string as a file''' - def __init__(self, parent_inode, contents, _ctime, _mtime): - super(StringFile, self).__init__(parent_inode, _ctime, _mtime) + def __init__(self, parent_inode, contents, _mtime): + super(StringFile, self).__init__(parent_inode, _mtime) self.contents = contents def size(self): return len(self.contents) def readfrom(self, off, size): - return self.contents[off:(off+size)] + return self.contents[off:(off+size)] class ObjectFile(StringFile): '''Wrap a dict as a serialized json object.''' def __init__(self, parent_inode, obj): - super(ObjectFile, self).__init__(parent_inode, "", 0, 0) + super(ObjectFile, self).__init__(parent_inode, "", 0) self.uuid = obj['uuid'] self.update(obj) def update(self, obj): - self._ctime = convertTime(obj['created_at']) if 'created_at' in obj else 0 self._mtime = convertTime(obj['modified_at']) if 'modified_at' in obj else 0 self.contents = json.dumps(obj, indent=4, sort_keys=True) + "\n" @@ -189,6 +191,7 @@ class Directory(FreshBase): raise Exception("parent_inode should be an int") self.parent_inode = parent_inode self._entries = {} + self._mtime = time.time() # Overriden by subclasses to implement logic to update the entries dict # when the directory is stale @@ -245,6 +248,7 @@ class Directory(FreshBase): oldentries = self._entries self._entries = {} + changed = False for i in items: name = sanitize_filename(fn(i)) if name: @@ -257,11 +261,17 @@ class Directory(FreshBase): ent = new_entry(i) if ent is not None: self._entries[name] = self.inodes.add_entry(ent) + changed = True # delete any other directory entries that were not in found in 'items' - for i in oldentries: + for i in oldentries: llfuse.invalidate_entry(self.inode, str(i)) self.inodes.del_entry(oldentries[i]) + changed = True + + if changed: + self._mtime = time.time() + self.fresh() def clear(self): @@ -275,6 +285,9 @@ class Directory(FreshBase): self.inodes.del_entry(oldentries[n]) self.invalidate() + def mtime(self): + return self._mtime + class CollectionDirectory(Directory): '''Represents the root of a directory tree holding a collection.''' @@ -300,7 +313,7 @@ class CollectionDirectory(Directory): self.collection_object_file.update(self.collection_object) self.clear() - collection = arvados.CollectionReader(self.collection_object["manifest_text"], self.api) + collection = arvados.CollectionReader(self.collection_object["manifest_text"], self.api, self.api.localkeep()) for s in collection.all_streams(): cwd = self for part in s.name().split('/'): @@ -310,11 +323,11 @@ class CollectionDirectory(Directory): cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode)) cwd = cwd._entries[partname] for k, v in s.files().items(): - cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.ctime(), self.mtime())) + cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.mtime())) def update(self): try: - if self.collection_object is not None and re.match(r'^[a-f0-9]{32}', self.collection_locator): + if self.collection_object is not None and portable_data_hash_pattern.match(self.collection_locator): return True with llfuse.lock_released: @@ -334,11 +347,15 @@ class CollectionDirectory(Directory): else: _logger.error("arv-mount %s: error", self.collection_locator) _logger.exception(detail) + except arvados.errors.ArgumentError as detail: + _logger.warning("arv-mount %s: error %s", self.collection_locator, detail) + if self.collection_object is not None and "manifest_text" in self.collection_object: + _logger.warning("arv-mount manifest_text is: %s", self.collection_object["manifest_text"]) except Exception as detail: _logger.error("arv-mount %s: error", self.collection_locator) - if "manifest_text" in self.collection_object: + if self.collection_object is not None and "manifest_text" in self.collection_object: _logger.error("arv-mount manifest_text is: %s", self.collection_object["manifest_text"]) - _logger.exception(detail) + _logger.exception(detail) return False def __getitem__(self, item): @@ -357,13 +374,9 @@ class CollectionDirectory(Directory): else: return super(CollectionDirectory, self).__contains__(k) - def ctime(self): - self.checkupdate() - return convertTime(self.collection_object["created_at"]) if self.collection_object is not None else 0 - def mtime(self): self.checkupdate() - return convertTime(self.collection_object["modified_at"]) if self.collection_object is not None else 0 + return convertTime(self.collection_object["modified_at"]) if self.collection_object is not None and 'modified_at' in self.collection_object else 0 class MagicDirectory(Directory): @@ -380,10 +393,33 @@ class MagicDirectory(Directory): super(MagicDirectory, self).__init__(parent_inode) self.inodes = inodes self.api = api + # Have to defer creating readme_file because at this point we don't + # yet have an inode assigned. + self.readme_file = None + + def create_readme(self): + if self.readme_file is None: + text = '''This directory provides access to Arvados collections as subdirectories listed +by uuid (in the form 'zzzzz-4zz18-1234567890abcde') or portable data hash (in +the form '1234567890abcdefghijklmnopqrstuv+123'). + +Note that this directory will appear empty until you attempt to access a +specific collection subdirectory (such as trying to 'cd' into it), at which +point the collection will actually be looked up on the server and the directory +will appear if it exists. +''' + self.readme_file = self.inodes.add_entry(StringFile(self.inode, text, time.time())) + self._entries["README"] = self.readme_file def __contains__(self, k): + self.create_readme() + if k in self._entries: return True + + if not portable_data_hash_pattern.match(k) and not uuid_pattern.match(k): + return False + try: e = self.inodes.add_entry(CollectionDirectory(self.inode, self.inodes, self.api, k)) if e.update(): @@ -395,6 +431,10 @@ class MagicDirectory(Directory): _logger.debug('arv-mount exception keep %s', e) return False + def items(self): + self.create_readme() + return self._entries.items() + def __getitem__(self, item): if item in self: return self._entries[item] @@ -424,9 +464,6 @@ class TagsDirectory(RecursiveInvalidateDirectory): super(TagsDirectory, self).__init__(parent_inode) self.inodes = inodes self.api = api - #try: - # arvados.events.subscribe(self.api, [['object_uuid', 'is_a', 'arvados#link']], lambda ev: self.invalidate()) - #except: self._poll = True self._poll_time = poll_time @@ -465,7 +502,7 @@ class TagDirectory(Directory): lambda i: CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid'])) -class ProjectDirectory(RecursiveInvalidateDirectory): +class ProjectDirectory(Directory): '''A special directory that contains the contents of a project.''' def __init__(self, parent_inode, inodes, api, project_object, poll=False, poll_time=60): @@ -473,41 +510,42 @@ class ProjectDirectory(RecursiveInvalidateDirectory): self.inodes = inodes self.api = api self.project_object = project_object - self.project_object_file = ObjectFile(self.inode, self.project_object) - self.inodes.add_entry(self.project_object_file) + self.project_object_file = None self.uuid = project_object['uuid'] def createDirectory(self, i): - if re.match(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}', i['uuid']): + if collection_uuid_pattern.match(i['uuid']): return CollectionDirectory(self.inode, self.inodes, self.api, i) - elif re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', i['uuid']): + elif group_uuid_pattern.match(i['uuid']): return ProjectDirectory(self.inode, self.inodes, self.api, i, self._poll, self._poll_time) - elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']): - if i['head_kind'] == 'arvados#collection' or re.match('[0-9a-f]{32}\+\d+', i['head_uuid']): + elif link_uuid_pattern.match(i['uuid']): + if i['head_kind'] == 'arvados#collection' or portable_data_hash_pattern.match(i['head_uuid']): return CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid']) else: return None - #elif re.match(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}', i['uuid']): - # return None - elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}', i['uuid']): + elif uuid_pattern.match(i['uuid']): return ObjectFile(self.parent_inode, i) else: return None def update(self): + if self.project_object_file == None: + self.project_object_file = ObjectFile(self.inode, self.project_object) + self.inodes.add_entry(self.project_object_file) + def namefn(i): if 'name' in i: if i['name'] is None or len(i['name']) == 0: return None - elif re.match(r'[a-z0-9]{5}-(4zz18|j7d0g)-[a-z0-9]{15}', i['uuid']): + elif collection_uuid_pattern.match(i['uuid']) or group_uuid_pattern.match(i['uuid']): # collection or subproject return i['name'] - elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']) and i['head_kind'] == 'arvados#collection': + elif link_uuid_pattern.match(i['uuid']) and i['head_kind'] == 'arvados#collection': # name link return i['name'] elif 'kind' in i and i['kind'].startswith('arvados#'): # something else - return "{}.{}".format(i['name'], i['kind'][8:]) + return "{}.{}".format(i['name'], i['kind'][8:]) else: return None @@ -521,15 +559,15 @@ class ProjectDirectory(RecursiveInvalidateDirectory): return False with llfuse.lock_released: - if re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', self.uuid): + if group_uuid_pattern.match(self.uuid): self.project_object = self.api.groups().get(uuid=self.uuid).execute() - elif re.match(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}', self.uuid): + elif user_uuid_pattern.match(self.uuid): self.project_object = self.api.users().get(uuid=self.uuid).execute() contents = arvados.util.list_all(self.api.groups().contents, uuid=self.uuid) # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use. contents += arvados.util.list_all(self.api.links().list, filters=[['tail_uuid', '=', self.uuid], ['link_class', '=', 'name']]) - + # end with llfuse.lock_released, re-acquire lock self.merge(contents, @@ -550,14 +588,8 @@ class ProjectDirectory(RecursiveInvalidateDirectory): else: return super(ProjectDirectory, self).__contains__(k) - def ctime(self): - return convertTime(self.project_object["created_at"]) if "created_at" in self.project_object else 0 - def mtime(self): - return convertTime(self.project_object["modified_at"]) if "modified_at" in self.project_object else 0 - - -class SharedDirectory(RecursiveInvalidateDirectory): +class SharedDirectory(Directory): '''A special directory that represents users or groups who have shared projects with me.''' def __init__(self, parent_inode, inodes, api, exclude, poll=False, poll_time=60): @@ -565,10 +597,6 @@ class SharedDirectory(RecursiveInvalidateDirectory): self.current_user = api.users().current().execute() self.inodes = inodes self.api = api - - # try: - # arvados.events.subscribe(self.api, [], lambda ev: self.invalidate()) - # except: self._poll = True self._poll_time = poll_time @@ -721,12 +749,10 @@ class Operations(llfuse.Operations): entry.st_size = e.size() entry.st_blksize = 512 - entry.st_blocks = (e.size()/512) - if e.size()/512 != 0: - entry.st_blocks += 1 - entry.st_atime = 0 - entry.st_mtime = e.mtime() - entry.st_ctime = e.ctime() + entry.st_blocks = (e.size()/512)+1 + entry.st_atime = int(e.atime()) + entry.st_mtime = int(e.mtime()) + entry.st_ctime = int(e.mtime()) return entry @@ -742,7 +768,7 @@ class Operations(llfuse.Operations): p = self.inodes[parent_inode] if name == '..': inode = p.parent_inode - elif name in p: + elif isinstance(p, Directory) and name in p: inode = p[name].inode if inode != None: @@ -774,10 +800,17 @@ class Operations(llfuse.Operations): else: raise llfuse.FUSEError(errno.EBADF) + # update atime + handle.entry._atime = time.time() + try: with llfuse.lock_released: return handle.entry.readfrom(off, size) - except: + except arvados.errors.NotFoundError as e: + _logger.warning("Block not found: " + str(e)) + raise llfuse.FUSEError(errno.EIO) + except Exception as e: + _logger.exception(e) raise llfuse.FUSEError(errno.EIO) def release(self, fh): @@ -802,6 +835,9 @@ class Operations(llfuse.Operations): else: raise llfuse.FUSEError(errno.EIO) + # update atime + p._atime = time.time() + self._filehandles[fh] = FileHandle(fh, [('.', p), ('..', parent)] + list(p.items())) return fh