From: Peter Amstutz Date: Mon, 18 May 2015 18:27:07 +0000 (-0400) Subject: 3198: Add docstring with overview of the architecture. Implement check_update X-Git-Tag: 1.1.0~1503^2~28 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/27616fe74103c079a84ac34b2adb83f1952c5772 3198: Add docstring with overview of the architecture. Implement check_update method wrapper and expand use_counter to every method containing a llfuse.lock_released block. Add note about big_writes option. --- diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py index 792c81f1e6..2d44d6ad6e 100644 --- a/sdk/python/arvados/arvfile.py +++ b/sdk/python/arvados/arvfile.py @@ -866,6 +866,7 @@ class ArvadosFile(object): for s in to_delete: self.parent._my_block_manager().delete_bufferblock(s) + self.parent.notify(MOD, self.parent, self.name, (self, self)) @must_be_writable @synchronized diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py index fc810873fd..913db4ccfe 100644 --- a/services/fuse/arvados_fuse/__init__.py +++ b/services/fuse/arvados_fuse/__init__.py @@ -1,6 +1,49 @@ -# -# FUSE driver for Arvados Keep -# +"""FUSE driver for Arvados Keep + +Architecture: + +There is one `Operations` object per mount point. It is the entry point for all +read and write requests from the llfuse module. + +The operations object owns an `Inodes` object. The inodes object stores the +mapping from numeric inode (used throughout the file system API to uniquely +identify files) to the Python objects that implement files and directories. + +The `Inodes` object owns an `InodeCache` object. The inode cache records the +memory footprint of file system objects and when they are last used. When the +cache limit is exceeded, the least recently used objects are cleared. + +File system objects inherit from `fresh.FreshBase` which manages the object lifecycle. + +File objects inherit from `fusefile.File`. Key methods are `readfrom` and `writeto` +which implement actual reads and writes. + +Directory objects inherit from `fusedir.Directory`. The directory object wraps +a Python dict which stores the mapping from filenames to directory entries. +Directory contents can be accessed through the Python operators such as `[]` +and `in`. These methods automatically check if the directory is fresh (up to +date) or stale (needs update) and will call `update` if necessary before +returing a result. + +The general FUSE operation flow is as follows: + +- The request handler is called with either an inode or file handle that is the + subject of the operation. + +- Look up the inode using the Inodes table or the file handle in the + filehandles table to get the file system object. + +- For methods that alter files or directories, check that the operation is + valid and permitted using _check_writable(). + +- Call the relevant method on the file system object. + +- Return the result. + +The FUSE driver supports the Arvados event bus. When an event is received for +an object that is live in the inode cache, that object is immediately updated. + +""" import os import sys @@ -66,6 +109,17 @@ class DirectoryHandle(Handle): class InodeCache(object): + """Records the memory footprint of objects and when they are last used. + + When the cache limit is exceeded, the least recently used objects are + cleared. Clearing the object means discarding its contents to release + memory. The next time the object is accessed, it must be re-fetched from + the server. Note that the inode cache limit is a soft limit; the cache + limit may be exceeded if necessary to load very large objects, it may also + be exceeded if open file handles prevent objects from being cleared. + + """ + def __init__(self, cap, min_entries=4): self._entries = collections.OrderedDict() self._by_uuid = {} @@ -172,7 +226,10 @@ class Inodes(object): entry.dead = True _logger.debug("del_entry on inode %i with refcount %i", entry.inode, entry.ref_count) + def catch_exceptions(orig_func): + """Catch uncaught exceptions and log them consistently.""" + @functools.wraps(orig_func) def catch_exceptions_wrapper(self, *args, **kwargs): try: @@ -449,7 +506,7 @@ class Operations(llfuse.Operations): @catch_exceptions def statfs(self): st = llfuse.StatvfsData() - st.f_bsize = 64 * 1024 + st.f_bsize = 128 * 1024 st.f_blocks = 0 st.f_files = 0 diff --git a/services/fuse/arvados_fuse/fresh.py b/services/fuse/arvados_fuse/fresh.py index aeb8f737c5..6ecf35c612 100644 --- a/services/fuse/arvados_fuse/fresh.py +++ b/services/fuse/arvados_fuse/fresh.py @@ -22,8 +22,39 @@ def use_counter(orig_func): self.dec_use() return use_counter_wrapper +def check_update(orig_func): + @functools.wraps(orig_func) + def check_update_wrapper(self, *args, **kwargs): + self.checkupdate() + return orig_func(self, *args, **kwargs) + return check_update_wrapper + class FreshBase(object): - """Base class for maintaining fresh/stale state to determine when to update.""" + """Base class for maintaining object lifecycle. + + Functions include: + + * Indicate if an object is up to date (stale() == false) or needs to be + updated sets stale() == True). Use invalidate() to mark the object as + stale. An object is also automatically stale if it has not been updated + in `_poll_time` seconds. + + * Record access time (atime) timestamp + + * Manage internal use count used by the inode cache ("inc_use" and + "dec_use"). An object which is in use cannot be cleared by the inode + cache. + + * Manage the kernel reference count ("inc_ref" and "dec_ref"). An object + which is referenced by the kernel cannot have its inode entry deleted. + + * Record cache footprint, cache priority + + * Record Arvados uuid at the time the object is placed in the cache + + * Clear the object contents (invalidates the object) + + """ def __init__(self): self._stale = True self._poll = False diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py index 17709df1d7..2fca36eb0a 100644 --- a/services/fuse/arvados_fuse/fusedir.py +++ b/services/fuse/arvados_fuse/fusedir.py @@ -10,7 +10,7 @@ from apiclient import errors as apiclient_errors import errno from fusefile import StringFile, ObjectFile, FuseArvadosFile -from fresh import FreshBase, convertTime, use_counter +from fresh import FreshBase, convertTime, use_counter, check_update import arvados.collection from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern @@ -45,9 +45,10 @@ class Directory(FreshBase): """ def __init__(self, parent_inode, inodes): + """parent_inode is the integer inode number""" + super(Directory, self).__init__() - """parent_inode is the integer inode number""" self.inode = None if not isinstance(parent_inode, int): raise Exception("parent_inode should be an int") @@ -78,23 +79,23 @@ class Directory(FreshBase): _logger.warn(e) @use_counter + @check_update def __getitem__(self, item): - self.checkupdate() return self._entries[item] @use_counter + @check_update def items(self): - self.checkupdate() return list(self._entries.items()) @use_counter + @check_update def __contains__(self, k): - self.checkupdate() return k in self._entries @use_counter + @check_update def __len__(self): - self.checkupdate() return len(self._entries) def fresh(self): @@ -196,7 +197,22 @@ class Directory(FreshBase): def rename(self, name_old, name_new, src): raise NotImplementedError() + class CollectionDirectoryBase(Directory): + """Represent an Arvados Collection as a directory. + + This class is used for Subcollections, and is also the base class for + CollectionDirectory, which implements collection loading/saving on + Collection records. + + Most operations act only the underlying Arvados `Collection` object. The + `Collection` object signals via a notify callback to + `CollectionDirectoryBase.on_event` that an item was added, removed or + modified. FUSE inodes and directory entries are created, deleted or + invalidated in response to these events. + + """ + def __init__(self, parent_inode, inodes, collection): super(CollectionDirectoryBase, self).__init__(parent_inode, inodes) self.collection = collection @@ -243,28 +259,39 @@ class CollectionDirectoryBase(Directory): def writable(self): return self.collection.writable() + @use_counter def flush(self): with llfuse.lock_released: self.collection.root_collection().save() + @use_counter + @check_update def create(self, name): with llfuse.lock_released: self.collection.open(name, "w").close() + @use_counter + @check_update def mkdir(self, name): with llfuse.lock_released: self.collection.mkdirs(name) + @use_counter + @check_update def unlink(self, name): with llfuse.lock_released: self.collection.remove(name) self.flush() + @use_counter + @check_update def rmdir(self, name): with llfuse.lock_released: self.collection.remove(name) self.flush() + @use_counter + @check_update def rename(self, name_old, name_new, src): if not isinstance(src, CollectionDirectoryBase): raise llfuse.FUSEError(errno.EPERM) @@ -289,7 +316,7 @@ class CollectionDirectoryBase(Directory): class CollectionDirectory(CollectionDirectoryBase): - """Represents the root of a directory tree holding a collection.""" + """Represents the root of a directory tree representing a collection.""" def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None): super(CollectionDirectory, self).__init__(parent_inode, inodes, None) @@ -343,6 +370,7 @@ class CollectionDirectory(CollectionDirectoryBase): def uuid(self): return self.collection_locator + @use_counter def update(self): try: if self.collection_record is not None and portable_data_hash_pattern.match(self.collection_locator): @@ -403,8 +431,9 @@ class CollectionDirectory(CollectionDirectoryBase): _logger.error("arv-mount manifest_text is: %s", self.collection_record["manifest_text"]) return False + @use_counter + @check_update def __getitem__(self, item): - self.checkupdate() if item == '.arvados#collection': if self.collection_record_file is None: self.collection_record_file = ObjectFile(self.inode, self.collection_record) @@ -433,6 +462,7 @@ class CollectionDirectory(CollectionDirectoryBase): # footprint directly would be more accurate, but also more complicated. return self._manifest_size * 128 + class MagicDirectory(Directory): """A special directory that logically contains the set of all extant keep locators. @@ -522,6 +552,7 @@ class TagsDirectory(RecursiveInvalidateDirectory): self._poll = True self._poll_time = poll_time + @use_counter def update(self): with llfuse.lock_released: tags = self.api.links().list( @@ -549,6 +580,7 @@ class TagDirectory(Directory): self._poll = poll self._poll_time = poll_time + @use_counter def update(self): with llfuse.lock_released: taggedcollections = self.api.links().list( @@ -597,6 +629,7 @@ class ProjectDirectory(Directory): def uuid(self): return self.project_uuid + @use_counter def update(self): if self.project_object_file == None: self.project_object_file = ObjectFile(self.inode, self.project_object) @@ -650,8 +683,9 @@ class ProjectDirectory(Directory): finally: self._updating_lock.release() + @use_counter + @check_update def __getitem__(self, item): - self.checkupdate() if item == '.arvados#project': return self.project_object_file else: @@ -663,6 +697,8 @@ class ProjectDirectory(Directory): else: return super(ProjectDirectory, self).__contains__(k) + @use_counter + @check_update def writable(self): with llfuse.lock_released: if not self._current_user: @@ -672,6 +708,8 @@ class ProjectDirectory(Directory): def persisted(self): return True + @use_counter + @check_update def mkdir(self, name): try: with llfuse.lock_released: @@ -683,6 +721,8 @@ class ProjectDirectory(Directory): _logger.error(error) raise llfuse.FUSEError(errno.EEXIST) + @use_counter + @check_update def rmdir(self, name): if name not in self: raise llfuse.FUSEError(errno.ENOENT) @@ -694,6 +734,8 @@ class ProjectDirectory(Directory): self.api.collections().delete(uuid=self[name].uuid()).execute(num_retries=self.num_retries) self.invalidate() + @use_counter + @check_update def rename(self, name_old, name_new, src): if not isinstance(src, ProjectDirectory): raise llfuse.FUSEError(errno.EPERM) @@ -719,6 +761,7 @@ class ProjectDirectory(Directory): self._entries[name_new] = ent llfuse.invalidate_entry(src.inode, name_old) + class SharedDirectory(Directory): """A special directory that represents users or groups who have shared projects with me.""" @@ -731,6 +774,7 @@ class SharedDirectory(Directory): self._poll = True self._poll_time = poll_time + @use_counter def update(self): with llfuse.lock_released: all_projects = arvados.util.list_all( diff --git a/services/fuse/arvados_fuse/fusefile.py b/services/fuse/arvados_fuse/fusefile.py index d09b1f007c..d33f9f9e41 100644 --- a/services/fuse/arvados_fuse/fusefile.py +++ b/services/fuse/arvados_fuse/fusefile.py @@ -37,6 +37,7 @@ class File(FreshBase): def flush(self): pass + class FuseArvadosFile(File): """Wraps a ArvadosFile.""" diff --git a/services/fuse/bin/arv-mount b/services/fuse/bin/arv-mount index 49874da619..76476da1cb 100755 --- a/services/fuse/bin/arv-mount +++ b/services/fuse/bin/arv-mount @@ -156,6 +156,7 @@ From here, the following directories are available: opts = [optname for optname in ['allow_other', 'debug'] if getattr(args, optname)] + # Increase default read/write size from 4KiB to 128KiB opts += ["big_writes", "max_read=131072"] if args.exec_args: