3198: Add enable_write flag to FUSE and --enable-write and --read-only to
[arvados.git] / services / fuse / arvados_fuse / fusedir.py
index c1cfaa411e00452976a7c3aace663c0dc27817b5..f661e41483f5fb3a3bc952c44796c0d8c317b796 100644 (file)
@@ -5,9 +5,12 @@ import llfuse
 import arvados
 import apiclient
 import functools
+import threading
+from apiclient import errors as apiclient_errors
+import errno
 
 from fusefile import StringFile, ObjectFile, FuseArvadosFile
-from fresh import FreshBase, convertTime, use_counter
+from fresh import FreshBase, convertTime, use_counter, check_update
 
 import arvados.collection
 from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern
@@ -42,9 +45,10 @@ class Directory(FreshBase):
     """
 
     def __init__(self, parent_inode, inodes):
+        """parent_inode is the integer inode number"""
+
         super(Directory, self).__init__()
 
-        """parent_inode is the integer inode number"""
         self.inode = None
         if not isinstance(parent_inode, int):
             raise Exception("parent_inode should be an int")
@@ -72,23 +76,28 @@ class Directory(FreshBase):
             try:
                 self.update()
             except apiclient.errors.HttpError as e:
-                _logger.debug(e)
+                _logger.warn(e)
 
     @use_counter
+    @check_update
     def __getitem__(self, item):
-        self.checkupdate()
         return self._entries[item]
 
     @use_counter
+    @check_update
     def items(self):
-        self.checkupdate()
         return list(self._entries.items())
 
     @use_counter
+    @check_update
     def __contains__(self, k):
-        self.checkupdate()
         return k in self._entries
 
+    @use_counter
+    @check_update
+    def __len__(self):
+        return len(self._entries)
+
     def fresh(self):
         self.inodes.touch(self)
         super(Directory, self).fresh()
@@ -125,6 +134,7 @@ class Directory(FreshBase):
                     self._entries[name] = oldentries[name]
                     del oldentries[name]
                 else:
+                    _logger.debug("Adding entry '%s' to inode %i", name, self.inode)
                     # create new directory entry
                     ent = new_entry(i)
                     if ent is not None:
@@ -133,11 +143,13 @@ class Directory(FreshBase):
 
         # delete any other directory entries that were not in found in 'items'
         for i in oldentries:
-            llfuse.invalidate_entry(self.inode, str(i))
+            _logger.debug("Forgetting about entry '%s' on inode %i", i, self.inode)
+            llfuse.invalidate_entry(self.inode, i.encode(self.inodes.encoding))
             self.inodes.del_entry(oldentries[i])
             changed = True
 
         if changed:
+            llfuse.invalidate_inode(self.inode)
             self._mtime = time.time()
 
         self.fresh()
@@ -153,7 +165,7 @@ class Directory(FreshBase):
                     self._entries = oldentries
                     return False
             for n in oldentries:
-                llfuse.invalidate_entry(self.inode, str(n))
+                llfuse.invalidate_entry(self.inode, n.encode(self.inodes.encoding))
                 self.inodes.del_entry(oldentries[n])
             llfuse.invalidate_inode(self.inode)
             self.invalidate()
@@ -164,37 +176,171 @@ class Directory(FreshBase):
     def mtime(self):
         return self._mtime
 
+    def writable(self):
+        return False
+
+    def flush(self):
+        pass
+
+    def create(self, name):
+        raise NotImplementedError()
+
+    def mkdir(self, name):
+        raise NotImplementedError()
+
+    def unlink(self, name):
+        raise NotImplementedError()
 
-class CollectionDirectory(Directory):
-    """Represents the root of a directory tree holding a collection."""
+    def rmdir(self, name):
+        raise NotImplementedError()
 
-    def __init__(self, parent_inode, inodes, api, num_retries, collection):
-        super(CollectionDirectory, self).__init__(parent_inode, inodes)
+    def rename(self, name_old, name_new, src):
+        raise NotImplementedError()
+
+
+class CollectionDirectoryBase(Directory):
+    """Represent an Arvados Collection as a directory.
+
+    This class is used for Subcollections, and is also the base class for
+    CollectionDirectory, which implements collection loading/saving on
+    Collection records.
+
+    Most operations act only the underlying Arvados `Collection` object.  The
+    `Collection` object signals via a notify callback to
+    `CollectionDirectoryBase.on_event` that an item was added, removed or
+    modified.  FUSE inodes and directory entries are created, deleted or
+    invalidated in response to these events.
+
+    """
+
+    def __init__(self, parent_inode, inodes, collection):
+        super(CollectionDirectoryBase, self).__init__(parent_inode, inodes)
+        self.collection = collection
+
+    def new_entry(self, name, item, mtime):
+        name = sanitize_filename(name)
+        if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
+            if item.fuse_entry.dead is not True:
+                raise Exception("Can only reparent dead inode entry")
+            if item.fuse_entry.inode is None:
+                raise Exception("Reparented entry must still have valid inode")
+            item.fuse_entry.dead = False
+            self._entries[name] = item.fuse_entry
+        elif isinstance(item, arvados.collection.RichCollectionBase):
+            self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, item))
+            self._entries[name].populate(mtime)
+        else:
+            self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime))
+        item.fuse_entry = self._entries[name]
+
+    def on_event(self, event, collection, name, item):
+        if collection == self.collection:
+            name = sanitize_filename(name)
+            _logger.debug("collection notify %s %s %s %s", event, collection, name, item)
+            with llfuse.lock:
+                if event == arvados.collection.ADD:
+                    self.new_entry(name, item, self.mtime())
+                elif event == arvados.collection.DEL:
+                    ent = self._entries[name]
+                    del self._entries[name]
+                    llfuse.invalidate_entry(self.inode, name.encode(self.inodes.encoding))
+                    self.inodes.del_entry(ent)
+                elif event == arvados.collection.MOD:
+                    if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
+                        llfuse.invalidate_inode(item.fuse_entry.inode)
+                    elif name in self._entries:
+                        llfuse.invalidate_inode(self._entries[name].inode)
+
+    def populate(self, mtime):
+        self._mtime = mtime
+        self.collection.subscribe(self.on_event)
+        for entry, item in self.collection.items():
+            self.new_entry(entry, item, self.mtime())
+
+    def writable(self):
+        return self.collection.writable()
+
+    @use_counter
+    def flush(self):
+        with llfuse.lock_released:
+            self.collection.root_collection().save()
+
+    @use_counter
+    @check_update
+    def create(self, name):
+        with llfuse.lock_released:
+            self.collection.open(name, "w").close()
+
+    @use_counter
+    @check_update
+    def mkdir(self, name):
+        with llfuse.lock_released:
+            self.collection.mkdirs(name)
+
+    @use_counter
+    @check_update
+    def unlink(self, name):
+        with llfuse.lock_released:
+            self.collection.remove(name)
+        self.flush()
+
+    @use_counter
+    @check_update
+    def rmdir(self, name):
+        with llfuse.lock_released:
+            self.collection.remove(name)
+        self.flush()
+
+    @use_counter
+    @check_update
+    def rename(self, name_old, name_new, src):
+        if not isinstance(src, CollectionDirectoryBase):
+            raise llfuse.FUSEError(errno.EPERM)
+
+        if name_new in self:
+            ent = src[name_old]
+            tgt = self[name_new]
+            if isinstance(ent, FuseArvadosFile) and isinstance(tgt, FuseArvadosFile):
+                pass
+            elif isinstance(ent, CollectionDirectoryBase) and isinstance(tgt, CollectionDirectoryBase):
+                if len(tgt) > 0:
+                    raise llfuse.FUSEError(errno.ENOTEMPTY)
+            elif isinstance(ent, CollectionDirectoryBase) and isinstance(tgt, FuseArvadosFile):
+                raise llfuse.FUSEError(errno.ENOTDIR)
+            elif isinstance(ent, FuseArvadosFile) and isinstance(tgt, CollectionDirectoryBase):
+                raise llfuse.FUSEError(errno.EISDIR)
+
+        with llfuse.lock_released:
+            self.collection.rename(name_old, name_new, source_collection=src.collection, overwrite=True)
+        self.flush()
+        src.flush()
+
+
+class CollectionDirectory(CollectionDirectoryBase):
+    """Represents the root of a directory tree representing a collection."""
+
+    def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None):
+        super(CollectionDirectory, self).__init__(parent_inode, inodes, None)
         self.api = api
         self.num_retries = num_retries
-        self.collection_object_file = None
-        self.collection_object = None
-        if isinstance(collection, dict):
-            self.collection_locator = collection['uuid']
-            self._mtime = convertTime(collection.get('modified_at'))
+        self.collection_record_file = None
+        self.collection_record = None
+        if isinstance(collection_record, dict):
+            self.collection_locator = collection_record['uuid']
+            self._mtime = convertTime(collection_record.get('modified_at'))
         else:
-            self.collection_locator = collection
+            self.collection_locator = collection_record
             self._mtime = 0
         self._manifest_size = 0
+        if self.collection_locator:
+            self._writable = (uuid_pattern.match(self.collection_locator) is not None)
+        self._updating_lock = threading.Lock()
 
     def same(self, i):
         return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator
 
-    @staticmethod
-    def populate(inodes, cwd, collection, mtime):
-        for entry, item in collection.items():
-            entry = sanitize_filename(entry)
-            if isinstance(item, arvados.collection.RichCollectionBase):
-                cwd._entries[entry] = inodes.add_entry(Directory(cwd.inode, inodes))
-                cwd._mtime = mtime
-                CollectionDirectory.populate(inodes, cwd._entries[entry], item, mtime)
-            else:
-                cwd._entries[entry] = inodes.add_entry(FuseArvadosFile(cwd.inode, item, mtime))
+    def writable(self):
+        return self.collection.writable() if self.collection is not None else self._writable
 
     # Used by arv-web.py to switch the contents of the CollectionDirectory
     def change_collection(self, new_locator):
@@ -204,73 +350,99 @@ class CollectionDirectory(Directory):
         """
 
         self.collection_locator = new_locator
-        self.collection_object = None
+        self.collection_record = None
         self.update()
 
-    def new_collection(self, new_collection_object, coll_reader):
-        self.clear(force=True)
+    def new_collection(self, new_collection_record, coll_reader):
+        if self.inode:
+            self.clear(force=True)
 
-        self.collection_object = new_collection_object
+        self.collection_record = new_collection_record
 
-        self._mtime = convertTime(self.collection_object.get('modified_at'))
+        if self.collection_record:
+            self._mtime = convertTime(self.collection_record.get('modified_at'))
+            self.collection_locator = self.collection_record["uuid"]
+            if self.collection_record_file is not None:
+                self.collection_record_file.update(self.collection_record)
 
-        if self.collection_object_file is not None:
-            self.collection_object_file.update(self.collection_object)
+        self.collection = coll_reader
+        self.populate(self.mtime())
 
-        CollectionDirectory.populate(self.inodes, self, coll_reader, self.mtime())
+    def uuid(self):
+        return self.collection_locator
 
-    def update(self):
+    @use_counter
+    def update(self, to_record_version=None):
         try:
-            if self.collection_object is not None and portable_data_hash_pattern.match(self.collection_locator):
+            if self.collection_record is not None and portable_data_hash_pattern.match(self.collection_locator):
                 return True
 
             if self.collection_locator is None:
                 self.fresh()
                 return True
 
-            with llfuse.lock_released:
-                coll_reader = arvados.CollectionReader(
-                    self.collection_locator, self.api, self.api.keep,
-                    num_retries=self.num_retries)
-                new_collection_object = coll_reader.api_response() or {}
-                # If the Collection only exists in Keep, there will be no API
-                # response.  Fill in the fields we need.
-                if 'uuid' not in new_collection_object:
-                    new_collection_object['uuid'] = self.collection_locator
-                if "portable_data_hash" not in new_collection_object:
-                    new_collection_object["portable_data_hash"] = new_collection_object["uuid"]
-                if 'manifest_text' not in new_collection_object:
-                    new_collection_object['manifest_text'] = coll_reader.manifest_text()
-                coll_reader.normalize()
-            # end with llfuse.lock_released, re-acquire lock
-
-            if self.collection_object is None or self.collection_object["portable_data_hash"] != new_collection_object["portable_data_hash"]:
-                self.new_collection(new_collection_object, coll_reader)
-
-            self._manifest_size = len(coll_reader.manifest_text())
-            _logger.debug("%s manifest_size %i", self, self._manifest_size)
+            try:
+                with llfuse.lock_released:
+                    self._updating_lock.acquire()
+                    if not self.stale():
+                        return
+
+                    _logger.debug("Updating %s", self.collection_locator)
+                    if self.collection is not None:
+                        if self.collection.known_past_version(to_record_version):
+                            _logger.debug("%s already processed %s", self.collection_locator, to_record_version)
+                        else:
+                            self.collection.update()
+                    else:
+                        if uuid_pattern.match(self.collection_locator):
+                            coll_reader = arvados.collection.Collection(
+                                self.collection_locator, self.api, self.api.keep,
+                                num_retries=self.num_retries)
+                        else:
+                            coll_reader = arvados.collection.CollectionReader(
+                                self.collection_locator, self.api, self.api.keep,
+                                num_retries=self.num_retries)
+                        new_collection_record = coll_reader.api_response() or {}
+                        # If the Collection only exists in Keep, there will be no API
+                        # response.  Fill in the fields we need.
+                        if 'uuid' not in new_collection_record:
+                            new_collection_record['uuid'] = self.collection_locator
+                        if "portable_data_hash" not in new_collection_record:
+                            new_collection_record["portable_data_hash"] = new_collection_record["uuid"]
+                        if 'manifest_text' not in new_collection_record:
+                            new_collection_record['manifest_text'] = coll_reader.manifest_text()
+
+                        if self.collection_record is None or self.collection_record["portable_data_hash"] != new_collection_record.get("portable_data_hash"):
+                            self.new_collection(new_collection_record, coll_reader)
+
+                        self._manifest_size = len(coll_reader.manifest_text())
+                        _logger.debug("%s manifest_size %i", self, self._manifest_size)
+                # end with llfuse.lock_released, re-acquire lock
 
-            self.fresh()
-            return True
+                self.fresh()
+                return True
+            finally:
+                self._updating_lock.release()
         except arvados.errors.NotFoundError:
             _logger.exception("arv-mount %s: error", self.collection_locator)
         except arvados.errors.ArgumentError as detail:
             _logger.warning("arv-mount %s: error %s", self.collection_locator, detail)
-            if self.collection_object is not None and "manifest_text" in self.collection_object:
-                _logger.warning("arv-mount manifest_text is: %s", self.collection_object["manifest_text"])
+            if self.collection_record is not None and "manifest_text" in self.collection_record:
+                _logger.warning("arv-mount manifest_text is: %s", self.collection_record["manifest_text"])
         except Exception:
             _logger.exception("arv-mount %s: error", self.collection_locator)
-            if self.collection_object is not None and "manifest_text" in self.collection_object:
-                _logger.error("arv-mount manifest_text is: %s", self.collection_object["manifest_text"])
+            if self.collection_record is not None and "manifest_text" in self.collection_record:
+                _logger.error("arv-mount manifest_text is: %s", self.collection_record["manifest_text"])
         return False
 
+    @use_counter
+    @check_update
     def __getitem__(self, item):
-        self.checkupdate()
         if item == '.arvados#collection':
-            if self.collection_object_file is None:
-                self.collection_object_file = ObjectFile(self.inode, self.collection_object)
-                self.inodes.add_entry(self.collection_object_file)
-            return self.collection_object_file
+            if self.collection_record_file is None:
+                self.collection_record_file = ObjectFile(self.inode, self.collection_record)
+                self.inodes.add_entry(self.collection_record_file)
+            return self.collection_record_file
         else:
             return super(CollectionDirectory, self).__getitem__(item)
 
@@ -281,16 +453,26 @@ class CollectionDirectory(Directory):
             return super(CollectionDirectory, self).__contains__(k)
 
     def invalidate(self):
-        self.collection_object = None
-        self.collection_object_file = None
+        self.collection_record = None
+        self.collection_record_file = None
         super(CollectionDirectory, self).invalidate()
 
     def persisted(self):
         return (self.collection_locator is not None)
 
     def objsize(self):
+        # This is an empirically-derived heuristic to estimate the memory used
+        # to store this collection's metadata.  Calculating the memory
+        # footprint directly would be more accurate, but also more complicated.
         return self._manifest_size * 128
 
+    def finalize(self):
+        if self.collection is not None:
+            if self.writable():
+                self.collection.save()
+            self.collection.stop_threads()
+
+
 class MagicDirectory(Directory):
     """A special directory that logically contains the set of all extant keep locators.
 
@@ -380,6 +562,7 @@ class TagsDirectory(RecursiveInvalidateDirectory):
         self._poll = True
         self._poll_time = poll_time
 
+    @use_counter
     def update(self):
         with llfuse.lock_released:
             tags = self.api.links().list(
@@ -407,6 +590,7 @@ class TagDirectory(Directory):
         self._poll = poll
         self._poll_time = poll_time
 
+    @use_counter
     def update(self):
         with llfuse.lock_released:
             taggedcollections = self.api.links().list(
@@ -431,9 +615,11 @@ class ProjectDirectory(Directory):
         self.num_retries = num_retries
         self.project_object = project_object
         self.project_object_file = None
-        self.uuid = project_object['uuid']
+        self.project_uuid = project_object['uuid']
         self._poll = poll
         self._poll_time = poll_time
+        self._updating_lock = threading.Lock()
+        self._current_user = None
 
     def createDirectory(self, i):
         if collection_uuid_pattern.match(i['uuid']):
@@ -450,6 +636,10 @@ class ProjectDirectory(Directory):
         else:
             return None
 
+    def uuid(self):
+        return self.project_uuid
+
+    @use_counter
     def update(self):
         if self.project_object_file == None:
             self.project_object_file = ObjectFile(self.inode, self.project_object)
@@ -472,34 +662,40 @@ class ProjectDirectory(Directory):
                 return None
 
         def samefn(a, i):
-            if isinstance(a, CollectionDirectory):
-                return a.collection_locator == i['uuid']
-            elif isinstance(a, ProjectDirectory):
-                return a.uuid == i['uuid']
+            if isinstance(a, CollectionDirectory) or isinstance(a, ProjectDirectory):
+                return a.uuid() == i['uuid']
             elif isinstance(a, ObjectFile):
-                return a.uuid == i['uuid'] and not a.stale()
+                return a.uuid() == i['uuid'] and not a.stale()
             return False
 
-        with llfuse.lock_released:
-            if group_uuid_pattern.match(self.uuid):
-                self.project_object = self.api.groups().get(
-                    uuid=self.uuid).execute(num_retries=self.num_retries)
-            elif user_uuid_pattern.match(self.uuid):
-                self.project_object = self.api.users().get(
-                    uuid=self.uuid).execute(num_retries=self.num_retries)
+        try:
+            with llfuse.lock_released:
+                self._updating_lock.acquire()
+                if not self.stale():
+                    return
 
-            contents = arvados.util.list_all(self.api.groups().contents,
-                                             self.num_retries, uuid=self.uuid)
+                if group_uuid_pattern.match(self.project_uuid):
+                    self.project_object = self.api.groups().get(
+                        uuid=self.project_uuid).execute(num_retries=self.num_retries)
+                elif user_uuid_pattern.match(self.project_uuid):
+                    self.project_object = self.api.users().get(
+                        uuid=self.project_uuid).execute(num_retries=self.num_retries)
 
-        # end with llfuse.lock_released, re-acquire lock
+                contents = arvados.util.list_all(self.api.groups().contents,
+                                                 self.num_retries, uuid=self.project_uuid)
 
-        self.merge(contents,
-                   namefn,
-                   samefn,
-                   self.createDirectory)
+            # end with llfuse.lock_released, re-acquire lock
 
+            self.merge(contents,
+                       namefn,
+                       samefn,
+                       self.createDirectory)
+        finally:
+            self._updating_lock.release()
+
+    @use_counter
+    @check_update
     def __getitem__(self, item):
-        self.checkupdate()
         if item == '.arvados#project':
             return self.project_object_file
         else:
@@ -511,11 +707,70 @@ class ProjectDirectory(Directory):
         else:
             return super(ProjectDirectory, self).__contains__(k)
 
+    @use_counter
+    @check_update
+    def writable(self):
+        with llfuse.lock_released:
+            if not self._current_user:
+                self._current_user = self.api.users().current().execute(num_retries=self.num_retries)
+            return self._current_user["uuid"] in self.project_object["writable_by"]
+
     def persisted(self):
-        return False
+        return True
+
+    @use_counter
+    @check_update
+    def mkdir(self, name):
+        try:
+            with llfuse.lock_released:
+                self.api.collections().create(body={"owner_uuid": self.project_uuid,
+                                                    "name": name,
+                                                    "manifest_text": ""}).execute(num_retries=self.num_retries)
+            self.invalidate()
+        except apiclient_errors.Error as error:
+            _logger.error(error)
+            raise llfuse.FUSEError(errno.EEXIST)
+
+    @use_counter
+    @check_update
+    def rmdir(self, name):
+        if name not in self:
+            raise llfuse.FUSEError(errno.ENOENT)
+        if not isinstance(self[name], CollectionDirectory):
+            raise llfuse.FUSEError(errno.EPERM)
+        if len(self[name]) > 0:
+            raise llfuse.FUSEError(errno.ENOTEMPTY)
+        with llfuse.lock_released:
+            self.api.collections().delete(uuid=self[name].uuid()).execute(num_retries=self.num_retries)
+        self.invalidate()
+
+    @use_counter
+    @check_update
+    def rename(self, name_old, name_new, src):
+        if not isinstance(src, ProjectDirectory):
+            raise llfuse.FUSEError(errno.EPERM)
+
+        ent = src[name_old]
+
+        if not isinstance(ent, CollectionDirectory):
+            raise llfuse.FUSEError(errno.EPERM)
+
+        if name_new in self:
+            # POSIX semantics for replacing one directory with another is
+            # tricky (the target directory must be empty, the operation must be
+            # atomic which isn't possible with the Arvados API as of this
+            # writing) so don't support that.
+            raise llfuse.FUSEError(errno.EPERM)
+
+        self.api.collections().update(uuid=ent.uuid(),
+                                      body={"owner_uuid": self.uuid(),
+                                            "name": name_new}).execute(num_retries=self.num_retries)
+
+        # Acually move the entry from source directory to this directory.
+        del src._entries[name_old]
+        self._entries[name_new] = ent
+        llfuse.invalidate_entry(src.inode, name_old.encode(self.inodes.encoding))
 
-    def objsize(self):
-        return len(self.project_object) * 1024 if self.project_object else 0
 
 class SharedDirectory(Directory):
     """A special directory that represents users or groups who have shared projects with me."""
@@ -529,6 +784,7 @@ class SharedDirectory(Directory):
         self._poll = True
         self._poll_time = poll_time
 
+    @use_counter
     def update(self):
         with llfuse.lock_released:
             all_projects = arvados.util.list_all(
@@ -564,11 +820,14 @@ class SharedDirectory(Directory):
             for r in root_owners:
                 if r in objects:
                     obr = objects[r]
-                    if "name" in obr:
+                    if obr.get("name"):
                         contents[obr["name"]] = obr
-                    if "first_name" in obr:
+                    #elif obr.get("username"):
+                    #    contents[obr["username"]] = obr
+                    elif "first_name" in obr:
                         contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr
 
+
             for r in roots:
                 if r['owner_uuid'] not in objects:
                     contents[r['name']] = r
@@ -578,7 +837,7 @@ class SharedDirectory(Directory):
         try:
             self.merge(contents.items(),
                        lambda i: i[0],
-                       lambda a, i: a.uuid == i[1]['uuid'],
+                       lambda a, i: a.uuid() == i[1]['uuid'],
                        lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time))
         except Exception:
             _logger.exception()