X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/1cea19230b490876f99194ea4d84970d6330ae12..ae85b0b33bcae6e36dcfa66d2fca9c70583c54b6:/services/fuse/arvados_fuse/fusedir.py?ds=sidebyside diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py index 2757091aa4..4de9c0b498 100644 --- a/services/fuse/arvados_fuse/fusedir.py +++ b/services/fuse/arvados_fuse/fusedir.py @@ -5,10 +5,14 @@ import llfuse import arvados import apiclient import functools +import threading +from apiclient import errors as apiclient_errors +import errno -from fusefile import StringFile, StreamReaderFile, ObjectFile +from fusefile import StringFile, ObjectFile, FuseArvadosFile from fresh import FreshBase, convertTime, use_counter +import arvados.collection from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern _logger = logging.getLogger('arvados.arvados_fuse') @@ -71,7 +75,7 @@ class Directory(FreshBase): try: self.update() except apiclient.errors.HttpError as e: - _logger.debug(e) + _logger.warn(e) @use_counter def __getitem__(self, item): @@ -88,6 +92,11 @@ class Directory(FreshBase): self.checkupdate() return k in self._entries + @use_counter + def __len__(self): + self.checkupdate() + return len(self._entries) + def fresh(self): self.inodes.touch(self) super(Directory, self).fresh() @@ -124,6 +133,7 @@ class Directory(FreshBase): self._entries[name] = oldentries[name] del oldentries[name] else: + _logger.debug("Adding entry '%s' to inode %i", name, self.inode) # create new directory entry ent = new_entry(i) if ent is not None: @@ -132,11 +142,13 @@ class Directory(FreshBase): # delete any other directory entries that were not in found in 'items' for i in oldentries: + _logger.debug("Forgetting about entry '%s' on inode %i", str(i), self.inode) llfuse.invalidate_entry(self.inode, str(i)) self.inodes.del_entry(oldentries[i]) changed = True if changed: + llfuse.invalidate_inode(self.inode) self._mtime = time.time() self.fresh() @@ -163,27 +175,129 @@ class Directory(FreshBase): def mtime(self): return self._mtime + def writable(self): + return False + + def flush(self): + pass + + def create(self, name): + raise NotImplementedError() + + def mkdir(self, name): + raise NotImplementedError() + + def unlink(self, name): + raise NotImplementedError() + + def rmdir(self, name): + raise NotImplementedError() + + def rename(self, name_old, name_new, src): + raise NotImplementedError() + +class CollectionDirectoryBase(Directory): + def __init__(self, parent_inode, inodes, collection): + super(CollectionDirectoryBase, self).__init__(parent_inode, inodes) + self.collection = collection + + def new_entry(self, name, item, mtime): + name = sanitize_filename(name) + if hasattr(item, "fuse_entry") and item.fuse_entry is not None: + if item.fuse_entry.dead is not True: + raise Exception("Can only reparent dead inode entry") + if item.fuse_entry.inode is None: + raise Exception("Reparented entry must still have valid inode") + item.fuse_entry.dead = False + self._entries[name] = item.fuse_entry + elif isinstance(item, arvados.collection.RichCollectionBase): + self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, item)) + self._entries[name].populate(mtime) + else: + self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime)) + item.fuse_entry = self._entries[name] + + def on_event(self, event, collection, name, item): + if collection == self.collection: + _logger.debug("%s %s %s %s", event, collection, name, item) + with llfuse.lock: + if event == arvados.collection.ADD: + self.new_entry(name, item, self.mtime()) + elif event == arvados.collection.DEL: + ent = self._entries[name] + del self._entries[name] + llfuse.invalidate_entry(self.inode, name) + self.inodes.del_entry(ent) + elif event == arvados.collection.MOD: + if hasattr(item, "fuse_entry") and item.fuse_entry is not None: + llfuse.invalidate_inode(item.fuse_entry.inode) + elif name in self._entries: + llfuse.invalidate_inode(self._entries[name].inode) + + def populate(self, mtime): + self._mtime = mtime + self.collection.subscribe(self.on_event) + for entry, item in self.collection.items(): + self.new_entry(entry, item, self.mtime()) + + def writable(self): + return self.collection.writable() + + def flush(self): + with llfuse.lock_released: + self.collection.root_collection().save() + + def create(self, name): + with llfuse.lock_released: + self.collection.open(name, "w").close() + + def mkdir(self, name): + with llfuse.lock_released: + self.collection.mkdirs(name) + + def unlink(self, name): + with llfuse.lock_released: + self.collection.remove(name) + + def rmdir(self, name): + with llfuse.lock_released: + self.collection.remove(name) + + def rename(self, name_old, name_new, src): + with llfuse.lock_released: + if not isinstance(src, CollectionDirectoryBase): + raise llfuse.FUSEError(errno.EPERM) + self.collection.rename(name_old, name_new, source_collection=src.collection, overwrite=True) + self.flush() + src.flush() + -class CollectionDirectory(Directory): +class CollectionDirectory(CollectionDirectoryBase): """Represents the root of a directory tree holding a collection.""" - def __init__(self, parent_inode, inodes, api, num_retries, collection): - super(CollectionDirectory, self).__init__(parent_inode, inodes) + def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None): + super(CollectionDirectory, self).__init__(parent_inode, inodes, None) self.api = api self.num_retries = num_retries - self.collection_object_file = None - self.collection_object = None - if isinstance(collection, dict): - self.collection_locator = collection['uuid'] - self._mtime = convertTime(collection.get('modified_at')) + self.collection_record_file = None + self.collection_record = None + if isinstance(collection_record, dict): + self.collection_locator = collection_record['uuid'] + self._mtime = convertTime(collection_record.get('modified_at')) else: - self.collection_locator = collection + self.collection_locator = collection_record self._mtime = 0 self._manifest_size = 0 + if self.collection_locator: + self._writable = (uuid_pattern.match(self.collection_locator) is not None) + self._updating_lock = threading.Lock() def same(self, i): return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator + def writable(self): + return self.collection.writable() if self.collection is not None else self._writable + # Used by arv-web.py to switch the contents of the CollectionDirectory def change_collection(self, new_locator): """Switch the contents of the CollectionDirectory. @@ -192,82 +306,94 @@ class CollectionDirectory(Directory): """ self.collection_locator = new_locator - self.collection_object = None + self.collection_record = None self.update() - def new_collection(self, new_collection_object, coll_reader): - self.clear(force=True) + def new_collection(self, new_collection_record, coll_reader): + if self.inode: + self.clear(force=True) - self.collection_object = new_collection_object + self.collection_record = new_collection_record - self._mtime = convertTime(self.collection_object.get('modified_at')) + if self.collection_record: + self._mtime = convertTime(self.collection_record.get('modified_at')) + self.collection_locator = self.collection_record["uuid"] + if self.collection_record_file is not None: + self.collection_record_file.update(self.collection_record) - if self.collection_object_file is not None: - self.collection_object_file.update(self.collection_object) + self.collection = coll_reader + self.populate(self.mtime()) - for s in coll_reader.all_streams(): - cwd = self - for part in s.name().split('/'): - if part != '' and part != '.': - partname = sanitize_filename(part) - if partname not in cwd._entries: - cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode, self.inodes)) - cwd = cwd._entries[partname] - for k, v in s.files().items(): - cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.mtime())) + def uuid(self): + return self.collection_locator def update(self): try: - if self.collection_object is not None and portable_data_hash_pattern.match(self.collection_locator): + if self.collection_record is not None and portable_data_hash_pattern.match(self.collection_locator): return True if self.collection_locator is None: self.fresh() return True - with llfuse.lock_released: - coll_reader = arvados.CollectionReader( - self.collection_locator, self.api, self.api.keep, - num_retries=self.num_retries) - new_collection_object = coll_reader.api_response() or {} - # If the Collection only exists in Keep, there will be no API - # response. Fill in the fields we need. - if 'uuid' not in new_collection_object: - new_collection_object['uuid'] = self.collection_locator - if "portable_data_hash" not in new_collection_object: - new_collection_object["portable_data_hash"] = new_collection_object["uuid"] - if 'manifest_text' not in new_collection_object: - new_collection_object['manifest_text'] = coll_reader.manifest_text() - coll_reader.normalize() - # end with llfuse.lock_released, re-acquire lock - - if self.collection_object is None or self.collection_object["portable_data_hash"] != new_collection_object["portable_data_hash"]: - self.new_collection(new_collection_object, coll_reader) - - self._manifest_size = len(coll_reader.manifest_text()) - _logger.debug("%s manifest_size %i", self, self._manifest_size) + try: + with llfuse.lock_released: + self._updating_lock.acquire() + if not self.stale(): + return + + _logger.debug("Updating %s", self.collection_locator) + if self.collection: + self.collection.update() + else: + if uuid_pattern.match(self.collection_locator): + coll_reader = arvados.collection.Collection( + self.collection_locator, self.api, self.api.keep, + num_retries=self.num_retries) + else: + coll_reader = arvados.collection.CollectionReader( + self.collection_locator, self.api, self.api.keep, + num_retries=self.num_retries) + new_collection_record = coll_reader.api_response() or {} + # If the Collection only exists in Keep, there will be no API + # response. Fill in the fields we need. + if 'uuid' not in new_collection_record: + new_collection_record['uuid'] = self.collection_locator + if "portable_data_hash" not in new_collection_record: + new_collection_record["portable_data_hash"] = new_collection_record["uuid"] + if 'manifest_text' not in new_collection_record: + new_collection_record['manifest_text'] = coll_reader.manifest_text() + + if self.collection_record is None or self.collection_record["portable_data_hash"] != new_collection_record.get("portable_data_hash"): + self.new_collection(new_collection_record, coll_reader) + + self._manifest_size = len(coll_reader.manifest_text()) + _logger.debug("%s manifest_size %i", self, self._manifest_size) + # end with llfuse.lock_released, re-acquire lock - self.fresh() - return True + self.fresh() + return True + finally: + self._updating_lock.release() except arvados.errors.NotFoundError: _logger.exception("arv-mount %s: error", self.collection_locator) except arvados.errors.ArgumentError as detail: _logger.warning("arv-mount %s: error %s", self.collection_locator, detail) - if self.collection_object is not None and "manifest_text" in self.collection_object: - _logger.warning("arv-mount manifest_text is: %s", self.collection_object["manifest_text"]) + if self.collection_record is not None and "manifest_text" in self.collection_record: + _logger.warning("arv-mount manifest_text is: %s", self.collection_record["manifest_text"]) except Exception: _logger.exception("arv-mount %s: error", self.collection_locator) - if self.collection_object is not None and "manifest_text" in self.collection_object: - _logger.error("arv-mount manifest_text is: %s", self.collection_object["manifest_text"]) + if self.collection_record is not None and "manifest_text" in self.collection_record: + _logger.error("arv-mount manifest_text is: %s", self.collection_record["manifest_text"]) return False def __getitem__(self, item): self.checkupdate() if item == '.arvados#collection': - if self.collection_object_file is None: - self.collection_object_file = ObjectFile(self.inode, self.collection_object) - self.inodes.add_entry(self.collection_object_file) - return self.collection_object_file + if self.collection_record_file is None: + self.collection_record_file = ObjectFile(self.inode, self.collection_record) + self.inodes.add_entry(self.collection_record_file) + return self.collection_record_file else: return super(CollectionDirectory, self).__getitem__(item) @@ -278,8 +404,8 @@ class CollectionDirectory(Directory): return super(CollectionDirectory, self).__contains__(k) def invalidate(self): - self.collection_object = None - self.collection_object_file = None + self.collection_record = None + self.collection_record_file = None super(CollectionDirectory, self).invalidate() def persisted(self): @@ -431,9 +557,11 @@ class ProjectDirectory(Directory): self.num_retries = num_retries self.project_object = project_object self.project_object_file = None - self.uuid = project_object['uuid'] + self.project_uuid = project_object['uuid'] self._poll = poll self._poll_time = poll_time + self._updating_lock = threading.Lock() + self._current_user = None def createDirectory(self, i): if collection_uuid_pattern.match(i['uuid']): @@ -450,6 +578,9 @@ class ProjectDirectory(Directory): else: return None + def uuid(self): + return self.project_uuid + def update(self): if self.project_object_file == None: self.project_object_file = ObjectFile(self.inode, self.project_object) @@ -472,31 +603,36 @@ class ProjectDirectory(Directory): return None def samefn(a, i): - if isinstance(a, CollectionDirectory): - return a.collection_locator == i['uuid'] - elif isinstance(a, ProjectDirectory): - return a.uuid == i['uuid'] + if isinstance(a, CollectionDirectory) or isinstance(a, ProjectDirectory): + return a.uuid() == i['uuid'] elif isinstance(a, ObjectFile): - return a.uuid == i['uuid'] and not a.stale() + return a.uuid() == i['uuid'] and not a.stale() return False - with llfuse.lock_released: - if group_uuid_pattern.match(self.uuid): - self.project_object = self.api.groups().get( - uuid=self.uuid).execute(num_retries=self.num_retries) - elif user_uuid_pattern.match(self.uuid): - self.project_object = self.api.users().get( - uuid=self.uuid).execute(num_retries=self.num_retries) + try: + with llfuse.lock_released: + self._updating_lock.acquire() + if not self.stale(): + return - contents = arvados.util.list_all(self.api.groups().contents, - self.num_retries, uuid=self.uuid) + if group_uuid_pattern.match(self.project_uuid): + self.project_object = self.api.groups().get( + uuid=self.project_uuid).execute(num_retries=self.num_retries) + elif user_uuid_pattern.match(self.project_uuid): + self.project_object = self.api.users().get( + uuid=self.project_uuid).execute(num_retries=self.num_retries) - # end with llfuse.lock_released, re-acquire lock + contents = arvados.util.list_all(self.api.groups().contents, + self.num_retries, uuid=self.project_uuid) - self.merge(contents, - namefn, - samefn, - self.createDirectory) + # end with llfuse.lock_released, re-acquire lock + + self.merge(contents, + namefn, + samefn, + self.createDirectory) + finally: + self._updating_lock.release() def __getitem__(self, item): self.checkupdate() @@ -511,8 +647,39 @@ class ProjectDirectory(Directory): else: return super(ProjectDirectory, self).__contains__(k) + def writable(self): + with llfuse.lock_released: + if not self._current_user: + self._current_user = self.api.users().current().execute(num_retries=self.num_retries) + return self._current_user["uuid"] in self.project_object["writable_by"] + def persisted(self): - return False + return True + + def mkdir(self, name): + try: + with llfuse.lock_released: + new_collection = self.api.collections().create(body={"owner_uuid": self.project_uuid, + "name": name, + "manifest_text": ""}).execute(num_retries=self.num_retries) + self.invalidate() + except apiclient_errors.Error as error: + _logger.error(error) + raise llfuse.FUSEError(errno.EEXIST) + + def rmdir(self, name): + if name not in self: + raise llfuse.FUSEError(errno.ENOENT) + if not isinstance(self[name], CollectionDirectory): + raise llfuse.FUSEError(errno.EPERM) + if len(self[name]) > 0: + raise llfuse.FUSEError(errno.ENOTEMPTY) + with llfuse.lock_released: + self.api.collections().delete(uuid=self[name].uuid()).execute(num_retries=self.num_retries) + self.invalidate() + + def rename(self, name_old, name_new, src): + raise NotImplementedError() class SharedDirectory(Directory): @@ -562,11 +729,14 @@ class SharedDirectory(Directory): for r in root_owners: if r in objects: obr = objects[r] - if "name" in obr: + if obr.get("name"): contents[obr["name"]] = obr - if "first_name" in obr: + #elif obr.get("username"): + # contents[obr["username"]] = obr + elif "first_name" in obr: contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr + for r in roots: if r['owner_uuid'] not in objects: contents[r['name']] = r @@ -576,7 +746,7 @@ class SharedDirectory(Directory): try: self.merge(contents.items(), lambda i: i[0], - lambda a, i: a.uuid == i[1]['uuid'], + lambda a, i: a.uuid() == i[1]['uuid'], lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time)) except Exception: _logger.exception()