import arvados
import apiclient
import functools
+import threading
+from apiclient import errors as apiclient_errors
+import errno
-from fusefile import StringFile, StreamReaderFile, ObjectFile
-from fresh import FreshBase, convertTime, use_counter
+from fusefile import StringFile, ObjectFile, FuseArvadosFile
+from fresh import FreshBase, convertTime, use_counter, check_update
+import arvados.collection
from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern
_logger = logging.getLogger('arvados.arvados_fuse')
"""
def __init__(self, parent_inode, inodes):
+ """parent_inode is the integer inode number"""
+
super(Directory, self).__init__()
- """parent_inode is the integer inode number"""
self.inode = None
if not isinstance(parent_inode, int):
raise Exception("parent_inode should be an int")
try:
self.update()
except apiclient.errors.HttpError as e:
- _logger.debug(e)
+ _logger.warn(e)
@use_counter
+ @check_update
def __getitem__(self, item):
- self.checkupdate()
return self._entries[item]
@use_counter
+ @check_update
def items(self):
- self.checkupdate()
return list(self._entries.items())
@use_counter
+ @check_update
def __contains__(self, k):
- self.checkupdate()
return k in self._entries
+ @use_counter
+ @check_update
+ def __len__(self):
+ return len(self._entries)
+
def fresh(self):
self.inodes.touch(self)
super(Directory, self).fresh()
self._entries[name] = oldentries[name]
del oldentries[name]
else:
+ _logger.debug("Adding entry '%s' to inode %i", name, self.inode)
# create new directory entry
ent = new_entry(i)
if ent is not None:
# delete any other directory entries that were not in found in 'items'
for i in oldentries:
- llfuse.invalidate_entry(self.inode, str(i))
+ _logger.debug("Forgetting about entry '%s' on inode %i", i, self.inode)
+ self.inodes.invalidate_entry(self.inode, i.encode(self.inodes.encoding))
self.inodes.del_entry(oldentries[i])
changed = True
if changed:
+ self.inodes.invalidate_inode(self.inode)
self._mtime = time.time()
self.fresh()
self._entries = oldentries
return False
for n in oldentries:
- llfuse.invalidate_entry(self.inode, str(n))
+ self.inodes.invalidate_entry(self.inode, n.encode(self.inodes.encoding))
self.inodes.del_entry(oldentries[n])
- llfuse.invalidate_inode(self.inode)
+ self.inodes.invalidate_inode(self.inode)
self.invalidate()
return True
else:
def mtime(self):
return self._mtime
+ def writable(self):
+ return False
+
+ def flush(self):
+ pass
+
+ def create(self, name):
+ raise NotImplementedError()
+
+ def mkdir(self, name):
+ raise NotImplementedError()
+
+ def unlink(self, name):
+ raise NotImplementedError()
+
+ def rmdir(self, name):
+ raise NotImplementedError()
+
+ def rename(self, name_old, name_new, src):
+ raise NotImplementedError()
+
+
+class CollectionDirectoryBase(Directory):
+ """Represent an Arvados Collection as a directory.
+
+ This class is used for Subcollections, and is also the base class for
+ CollectionDirectory, which implements collection loading/saving on
+ Collection records.
+
+ Most operations act only the underlying Arvados `Collection` object. The
+ `Collection` object signals via a notify callback to
+ `CollectionDirectoryBase.on_event` that an item was added, removed or
+ modified. FUSE inodes and directory entries are created, deleted or
+ invalidated in response to these events.
+
+ """
+
+ def __init__(self, parent_inode, inodes, collection):
+ super(CollectionDirectoryBase, self).__init__(parent_inode, inodes)
+ self.collection = collection
+
+ def new_entry(self, name, item, mtime):
+ name = sanitize_filename(name)
+ if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
+ if item.fuse_entry.dead is not True:
+ raise Exception("Can only reparent dead inode entry")
+ if item.fuse_entry.inode is None:
+ raise Exception("Reparented entry must still have valid inode")
+ item.fuse_entry.dead = False
+ self._entries[name] = item.fuse_entry
+ elif isinstance(item, arvados.collection.RichCollectionBase):
+ self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, item))
+ self._entries[name].populate(mtime)
+ else:
+ self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime))
+ item.fuse_entry = self._entries[name]
+
+ def on_event(self, event, collection, name, item):
+ if collection == self.collection:
+ name = sanitize_filename(name)
+ _logger.debug("collection notify %s %s %s %s", event, collection, name, item)
+ with llfuse.lock:
+ if event == arvados.collection.ADD:
+ self.new_entry(name, item, self.mtime())
+ elif event == arvados.collection.DEL:
+ ent = self._entries[name]
+ del self._entries[name]
+ self.inodes.invalidate_entry(self.inode, name.encode(self.inodes.encoding))
+ self.inodes.del_entry(ent)
+ elif event == arvados.collection.MOD:
+ if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
+ self.inodes.invalidate_inode(item.fuse_entry.inode)
+ elif name in self._entries:
+ self.inodes.invalidate_inode(self._entries[name].inode)
+
+ def populate(self, mtime):
+ self._mtime = mtime
+ self.collection.subscribe(self.on_event)
+ for entry, item in self.collection.items():
+ self.new_entry(entry, item, self.mtime())
+
+ def writable(self):
+ return self.collection.writable()
+
+ @use_counter
+ def flush(self):
+ with llfuse.lock_released:
+ self.collection.root_collection().save()
+
+ @use_counter
+ @check_update
+ def create(self, name):
+ with llfuse.lock_released:
+ self.collection.open(name, "w").close()
+
+ @use_counter
+ @check_update
+ def mkdir(self, name):
+ with llfuse.lock_released:
+ self.collection.mkdirs(name)
+
+ @use_counter
+ @check_update
+ def unlink(self, name):
+ with llfuse.lock_released:
+ self.collection.remove(name)
+ self.flush()
-class CollectionDirectory(Directory):
- """Represents the root of a directory tree holding a collection."""
+ @use_counter
+ @check_update
+ def rmdir(self, name):
+ with llfuse.lock_released:
+ self.collection.remove(name)
+ self.flush()
+
+ @use_counter
+ @check_update
+ def rename(self, name_old, name_new, src):
+ if not isinstance(src, CollectionDirectoryBase):
+ raise llfuse.FUSEError(errno.EPERM)
+
+ if name_new in self:
+ ent = src[name_old]
+ tgt = self[name_new]
+ if isinstance(ent, FuseArvadosFile) and isinstance(tgt, FuseArvadosFile):
+ pass
+ elif isinstance(ent, CollectionDirectoryBase) and isinstance(tgt, CollectionDirectoryBase):
+ if len(tgt) > 0:
+ raise llfuse.FUSEError(errno.ENOTEMPTY)
+ elif isinstance(ent, CollectionDirectoryBase) and isinstance(tgt, FuseArvadosFile):
+ raise llfuse.FUSEError(errno.ENOTDIR)
+ elif isinstance(ent, FuseArvadosFile) and isinstance(tgt, CollectionDirectoryBase):
+ raise llfuse.FUSEError(errno.EISDIR)
+
+ with llfuse.lock_released:
+ self.collection.rename(name_old, name_new, source_collection=src.collection, overwrite=True)
+ self.flush()
+ src.flush()
- def __init__(self, parent_inode, inodes, api, num_retries, collection):
- super(CollectionDirectory, self).__init__(parent_inode, inodes)
+
+class CollectionDirectory(CollectionDirectoryBase):
+ """Represents the root of a directory tree representing a collection."""
+
+ def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None):
+ super(CollectionDirectory, self).__init__(parent_inode, inodes, None)
self.api = api
self.num_retries = num_retries
- self.collection_object_file = None
- self.collection_object = None
- if isinstance(collection, dict):
- self.collection_locator = collection['uuid']
- self._mtime = convertTime(collection.get('modified_at'))
+ self.collection_record_file = None
+ self.collection_record = None
+ if isinstance(collection_record, dict):
+ self.collection_locator = collection_record['uuid']
+ self._mtime = convertTime(collection_record.get('modified_at'))
else:
- self.collection_locator = collection
+ self.collection_locator = collection_record
self._mtime = 0
self._manifest_size = 0
+ if self.collection_locator:
+ self._writable = (uuid_pattern.match(self.collection_locator) is not None)
+ self._updating_lock = threading.Lock()
def same(self, i):
return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator
+ def writable(self):
+ return self.collection.writable() if self.collection is not None else self._writable
+
# Used by arv-web.py to switch the contents of the CollectionDirectory
def change_collection(self, new_locator):
"""Switch the contents of the CollectionDirectory.
"""
self.collection_locator = new_locator
- self.collection_object = None
+ self.collection_record = None
self.update()
- def new_collection(self, new_collection_object, coll_reader):
- self.clear(force=True)
+ def new_collection(self, new_collection_record, coll_reader):
+ if self.inode:
+ self.clear(force=True)
- self.collection_object = new_collection_object
+ self.collection_record = new_collection_record
- self._mtime = convertTime(self.collection_object.get('modified_at'))
+ if self.collection_record:
+ self._mtime = convertTime(self.collection_record.get('modified_at'))
+ self.collection_locator = self.collection_record["uuid"]
+ if self.collection_record_file is not None:
+ self.collection_record_file.update(self.collection_record)
- if self.collection_object_file is not None:
- self.collection_object_file.update(self.collection_object)
+ self.collection = coll_reader
+ self.populate(self.mtime())
- for s in coll_reader.all_streams():
- cwd = self
- for part in s.name().split('/'):
- if part != '' and part != '.':
- partname = sanitize_filename(part)
- if partname not in cwd._entries:
- cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode, self.inodes))
- cwd = cwd._entries[partname]
- for k, v in s.files().items():
- cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.mtime()))
+ def uuid(self):
+ return self.collection_locator
- def update(self):
+ @use_counter
+ def update(self, to_record_version=None):
try:
- if self.collection_object is not None and portable_data_hash_pattern.match(self.collection_locator):
+ if self.collection_record is not None and portable_data_hash_pattern.match(self.collection_locator):
return True
if self.collection_locator is None:
self.fresh()
return True
- with llfuse.lock_released:
- coll_reader = arvados.CollectionReader(
- self.collection_locator, self.api, self.api.keep,
- num_retries=self.num_retries)
- new_collection_object = coll_reader.api_response() or {}
- # If the Collection only exists in Keep, there will be no API
- # response. Fill in the fields we need.
- if 'uuid' not in new_collection_object:
- new_collection_object['uuid'] = self.collection_locator
- if "portable_data_hash" not in new_collection_object:
- new_collection_object["portable_data_hash"] = new_collection_object["uuid"]
- if 'manifest_text' not in new_collection_object:
- new_collection_object['manifest_text'] = coll_reader.manifest_text()
- coll_reader.normalize()
- # end with llfuse.lock_released, re-acquire lock
-
- if self.collection_object is None or self.collection_object["portable_data_hash"] != new_collection_object["portable_data_hash"]:
- self.new_collection(new_collection_object, coll_reader)
-
- self._manifest_size = len(coll_reader.manifest_text())
- _logger.debug("%s manifest_size %i", self, self._manifest_size)
+ try:
+ with llfuse.lock_released:
+ self._updating_lock.acquire()
+ if not self.stale():
+ return
+
+ _logger.debug("Updating %s", to_record_version)
+ if self.collection is not None:
+ if self.collection.known_past_version(to_record_version):
+ _logger.debug("%s already processed %s", self.collection_locator, to_record_version)
+ else:
+ self.collection.update()
+ else:
+ if uuid_pattern.match(self.collection_locator):
+ coll_reader = arvados.collection.Collection(
+ self.collection_locator, self.api, self.api.keep,
+ num_retries=self.num_retries)
+ else:
+ coll_reader = arvados.collection.CollectionReader(
+ self.collection_locator, self.api, self.api.keep,
+ num_retries=self.num_retries)
+ new_collection_record = coll_reader.api_response() or {}
+ # If the Collection only exists in Keep, there will be no API
+ # response. Fill in the fields we need.
+ if 'uuid' not in new_collection_record:
+ new_collection_record['uuid'] = self.collection_locator
+ if "portable_data_hash" not in new_collection_record:
+ new_collection_record["portable_data_hash"] = new_collection_record["uuid"]
+ if 'manifest_text' not in new_collection_record:
+ new_collection_record['manifest_text'] = coll_reader.manifest_text()
+
+ if self.collection_record is None or self.collection_record["portable_data_hash"] != new_collection_record.get("portable_data_hash"):
+ self.new_collection(new_collection_record, coll_reader)
+
+ self._manifest_size = len(coll_reader.manifest_text())
+ _logger.debug("%s manifest_size %i", self, self._manifest_size)
+ # end with llfuse.lock_released, re-acquire lock
- self.fresh()
- return True
+ self.fresh()
+ return True
+ finally:
+ self._updating_lock.release()
except arvados.errors.NotFoundError:
_logger.exception("arv-mount %s: error", self.collection_locator)
except arvados.errors.ArgumentError as detail:
_logger.warning("arv-mount %s: error %s", self.collection_locator, detail)
- if self.collection_object is not None and "manifest_text" in self.collection_object:
- _logger.warning("arv-mount manifest_text is: %s", self.collection_object["manifest_text"])
+ if self.collection_record is not None and "manifest_text" in self.collection_record:
+ _logger.warning("arv-mount manifest_text is: %s", self.collection_record["manifest_text"])
except Exception:
_logger.exception("arv-mount %s: error", self.collection_locator)
- if self.collection_object is not None and "manifest_text" in self.collection_object:
- _logger.error("arv-mount manifest_text is: %s", self.collection_object["manifest_text"])
+ if self.collection_record is not None and "manifest_text" in self.collection_record:
+ _logger.error("arv-mount manifest_text is: %s", self.collection_record["manifest_text"])
return False
+ @use_counter
+ @check_update
def __getitem__(self, item):
- self.checkupdate()
if item == '.arvados#collection':
- if self.collection_object_file is None:
- self.collection_object_file = ObjectFile(self.inode, self.collection_object)
- self.inodes.add_entry(self.collection_object_file)
- return self.collection_object_file
+ if self.collection_record_file is None:
+ self.collection_record_file = ObjectFile(self.inode, self.collection_record)
+ self.inodes.add_entry(self.collection_record_file)
+ return self.collection_record_file
else:
return super(CollectionDirectory, self).__getitem__(item)
return super(CollectionDirectory, self).__contains__(k)
def invalidate(self):
- self.collection_object = None
- self.collection_object_file = None
+ self.collection_record = None
+ self.collection_record_file = None
super(CollectionDirectory, self).invalidate()
def persisted(self):
# footprint directly would be more accurate, but also more complicated.
return self._manifest_size * 128
+ def finalize(self):
+ if self.collection is not None:
+ if self.writable():
+ self.collection.save()
+ self.collection.stop_threads()
+
+
class MagicDirectory(Directory):
"""A special directory that logically contains the set of all extant keep locators.
self._poll = True
self._poll_time = poll_time
+ @use_counter
def update(self):
with llfuse.lock_released:
tags = self.api.links().list(
self._poll = poll
self._poll_time = poll_time
+ @use_counter
def update(self):
with llfuse.lock_released:
taggedcollections = self.api.links().list(
self.num_retries = num_retries
self.project_object = project_object
self.project_object_file = None
- self.uuid = project_object['uuid']
+ self.project_uuid = project_object['uuid']
self._poll = poll
self._poll_time = poll_time
+ self._updating_lock = threading.Lock()
+ self._current_user = None
def createDirectory(self, i):
if collection_uuid_pattern.match(i['uuid']):
else:
return None
+ def uuid(self):
+ return self.project_uuid
+
+ @use_counter
def update(self):
if self.project_object_file == None:
self.project_object_file = ObjectFile(self.inode, self.project_object)
return None
def samefn(a, i):
- if isinstance(a, CollectionDirectory):
- return a.collection_locator == i['uuid']
- elif isinstance(a, ProjectDirectory):
- return a.uuid == i['uuid']
+ if isinstance(a, CollectionDirectory) or isinstance(a, ProjectDirectory):
+ return a.uuid() == i['uuid']
elif isinstance(a, ObjectFile):
- return a.uuid == i['uuid'] and not a.stale()
+ return a.uuid() == i['uuid'] and not a.stale()
return False
- with llfuse.lock_released:
- if group_uuid_pattern.match(self.uuid):
- self.project_object = self.api.groups().get(
- uuid=self.uuid).execute(num_retries=self.num_retries)
- elif user_uuid_pattern.match(self.uuid):
- self.project_object = self.api.users().get(
- uuid=self.uuid).execute(num_retries=self.num_retries)
+ try:
+ with llfuse.lock_released:
+ self._updating_lock.acquire()
+ if not self.stale():
+ return
- contents = arvados.util.list_all(self.api.groups().contents,
- self.num_retries, uuid=self.uuid)
+ if group_uuid_pattern.match(self.project_uuid):
+ self.project_object = self.api.groups().get(
+ uuid=self.project_uuid).execute(num_retries=self.num_retries)
+ elif user_uuid_pattern.match(self.project_uuid):
+ self.project_object = self.api.users().get(
+ uuid=self.project_uuid).execute(num_retries=self.num_retries)
- # end with llfuse.lock_released, re-acquire lock
+ contents = arvados.util.list_all(self.api.groups().contents,
+ self.num_retries, uuid=self.project_uuid)
- self.merge(contents,
- namefn,
- samefn,
- self.createDirectory)
+ # end with llfuse.lock_released, re-acquire lock
+ self.merge(contents,
+ namefn,
+ samefn,
+ self.createDirectory)
+ finally:
+ self._updating_lock.release()
+
+ @use_counter
+ @check_update
def __getitem__(self, item):
- self.checkupdate()
if item == '.arvados#project':
return self.project_object_file
else:
else:
return super(ProjectDirectory, self).__contains__(k)
+ @use_counter
+ @check_update
+ def writable(self):
+ with llfuse.lock_released:
+ if not self._current_user:
+ self._current_user = self.api.users().current().execute(num_retries=self.num_retries)
+ return self._current_user["uuid"] in self.project_object["writable_by"]
+
def persisted(self):
- return False
+ return True
+
+ @use_counter
+ @check_update
+ def mkdir(self, name):
+ try:
+ with llfuse.lock_released:
+ self.api.collections().create(body={"owner_uuid": self.project_uuid,
+ "name": name,
+ "manifest_text": ""}).execute(num_retries=self.num_retries)
+ self.invalidate()
+ except apiclient_errors.Error as error:
+ _logger.error(error)
+ raise llfuse.FUSEError(errno.EEXIST)
+
+ @use_counter
+ @check_update
+ def rmdir(self, name):
+ if name not in self:
+ raise llfuse.FUSEError(errno.ENOENT)
+ if not isinstance(self[name], CollectionDirectory):
+ raise llfuse.FUSEError(errno.EPERM)
+ if len(self[name]) > 0:
+ raise llfuse.FUSEError(errno.ENOTEMPTY)
+ with llfuse.lock_released:
+ self.api.collections().delete(uuid=self[name].uuid()).execute(num_retries=self.num_retries)
+ self.invalidate()
+
+ @use_counter
+ @check_update
+ def rename(self, name_old, name_new, src):
+ if not isinstance(src, ProjectDirectory):
+ raise llfuse.FUSEError(errno.EPERM)
+
+ ent = src[name_old]
+
+ if not isinstance(ent, CollectionDirectory):
+ raise llfuse.FUSEError(errno.EPERM)
+
+ if name_new in self:
+ # POSIX semantics for replacing one directory with another is
+ # tricky (the target directory must be empty, the operation must be
+ # atomic which isn't possible with the Arvados API as of this
+ # writing) so don't support that.
+ raise llfuse.FUSEError(errno.EPERM)
+
+ self.api.collections().update(uuid=ent.uuid(),
+ body={"owner_uuid": self.uuid(),
+ "name": name_new}).execute(num_retries=self.num_retries)
+
+ # Acually move the entry from source directory to this directory.
+ del src._entries[name_old]
+ self._entries[name_new] = ent
+ self.inodes.invalidate_entry(src.inode, name_old.encode(self.inodes.encoding))
class SharedDirectory(Directory):
self._poll = True
self._poll_time = poll_time
+ @use_counter
def update(self):
with llfuse.lock_released:
all_projects = arvados.util.list_all(
for r in root_owners:
if r in objects:
obr = objects[r]
- if "name" in obr:
+ if obr.get("name"):
contents[obr["name"]] = obr
- if "first_name" in obr:
+ #elif obr.get("username"):
+ # contents[obr["username"]] = obr
+ elif "first_name" in obr:
contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr
+
for r in roots:
if r['owner_uuid'] not in objects:
contents[r['name']] = r
try:
self.merge(contents.items(),
lambda i: i[0],
- lambda a, i: a.uuid == i[1]['uuid'],
+ lambda a, i: a.uuid() == i[1]['uuid'],
lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time))
except Exception:
_logger.exception()