18941: Need to leave some space for current block
[arvados.git] / services / fuse / arvados_fuse / fusedir.py
index d5a018ae88fcd859adc3047ad2384732a0bfbe92..8dcde59ec336e2ba4cb872f7a7fbdc2faa755807 100644 (file)
@@ -36,7 +36,7 @@ class Directory(FreshBase):
     and the value referencing a File or Directory object.
     """
 
-    def __init__(self, parent_inode, inodes, apiconfig):
+    def __init__(self, parent_inode, inodes, apiconfig, enable_write):
         """parent_inode is the integer inode number"""
 
         super(Directory, self).__init__()
@@ -49,6 +49,7 @@ class Directory(FreshBase):
         self.apiconfig = apiconfig
         self._entries = {}
         self._mtime = time.time()
+        self._enable_write = enable_write
 
     def forward_slash_subst(self):
         if not hasattr(self, '_fsns'):
@@ -269,10 +270,12 @@ class CollectionDirectoryBase(Directory):
 
     """
 
-    def __init__(self, parent_inode, inodes, apiconfig, collection):
-        super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig)
+    def __init__(self, parent_inode, inodes, apiconfig, enable_write, collection, collection_root):
+        super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig, enable_write)
         self.apiconfig = apiconfig
         self.collection = collection
+        self.collection_root = collection_root
+        self.collection_record_file = None
 
     def new_entry(self, name, item, mtime):
         name = self.sanitize_filename(name)
@@ -284,13 +287,17 @@ class CollectionDirectoryBase(Directory):
             item.fuse_entry.dead = False
             self._entries[name] = item.fuse_entry
         elif isinstance(item, arvados.collection.RichCollectionBase):
-            self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, self.apiconfig, item))
+            self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, self.apiconfig, self._enable_write, item, self.collection_root))
             self._entries[name].populate(mtime)
         else:
-            self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime))
+            self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime, self._enable_write))
         item.fuse_entry = self._entries[name]
 
     def on_event(self, event, collection, name, item):
+        # These are events from the Collection object (ADD/DEL/MOD)
+        # emitted by operations on the Collection object (like
+        # "mkdirs" or "remove"), and by "update", which we need to
+        # synchronize with our FUSE objects that are assigned inodes.
         if collection == self.collection:
             name = self.sanitize_filename(name)
 
@@ -335,6 +342,10 @@ class CollectionDirectoryBase(Directory):
                                 self.inodes.invalidate_inode(item.fuse_entry)
                             elif name in self._entries:
                                 self.inodes.invalidate_inode(self._entries[name])
+
+                        if self.collection_record_file is not None:
+                            self.collection_record_file.invalidate()
+                            self.inodes.invalidate_inode(self.collection_record_file)
             finally:
                 while lockcount > 0:
                     self.collection.lock.acquire()
@@ -348,28 +359,33 @@ class CollectionDirectoryBase(Directory):
                 self.new_entry(entry, item, self.mtime())
 
     def writable(self):
-        return self.collection.writable()
+        return self._enable_write and self.collection.writable()
 
     @use_counter
     def flush(self):
-        with llfuse.lock_released:
-            self.collection.root_collection().save()
+        self.collection_root.flush()
 
     @use_counter
     @check_update
     def create(self, name):
+        if not self.writable():
+            raise llfuse.FUSEError(errno.EROFS)
         with llfuse.lock_released:
             self.collection.open(name, "w").close()
 
     @use_counter
     @check_update
     def mkdir(self, name):
+        if not self.writable():
+            raise llfuse.FUSEError(errno.EROFS)
         with llfuse.lock_released:
             self.collection.mkdirs(name)
 
     @use_counter
     @check_update
     def unlink(self, name):
+        if not self.writable():
+            raise llfuse.FUSEError(errno.EROFS)
         with llfuse.lock_released:
             self.collection.remove(name)
         self.flush()
@@ -377,6 +393,8 @@ class CollectionDirectoryBase(Directory):
     @use_counter
     @check_update
     def rmdir(self, name):
+        if not self.writable():
+            raise llfuse.FUSEError(errno.EROFS)
         with llfuse.lock_released:
             self.collection.remove(name)
         self.flush()
@@ -384,6 +402,9 @@ class CollectionDirectoryBase(Directory):
     @use_counter
     @check_update
     def rename(self, name_old, name_new, src):
+        if not self.writable():
+            raise llfuse.FUSEError(errno.EROFS)
+
         if not isinstance(src, CollectionDirectoryBase):
             raise llfuse.FUSEError(errno.EPERM)
 
@@ -413,12 +434,10 @@ class CollectionDirectoryBase(Directory):
 class CollectionDirectory(CollectionDirectoryBase):
     """Represents the root of a directory tree representing a collection."""
 
-    def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None):
-        super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config, None)
+    def __init__(self, parent_inode, inodes, api, num_retries, enable_write, collection_record=None, explicit_collection=None):
+        super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config, enable_write, None, self)
         self.api = api
         self.num_retries = num_retries
-        self.collection_record_file = None
-        self.collection_record = None
         self._poll = True
         try:
             self._poll_time = (api._rootDesc.get('blobSignatureTtl', 60*60*2) // 2)
@@ -434,79 +453,91 @@ class CollectionDirectory(CollectionDirectoryBase):
             self._mtime = 0
         self._manifest_size = 0
         if self.collection_locator:
-            self._writable = (uuid_pattern.match(self.collection_locator) is not None)
+            self._writable = (uuid_pattern.match(self.collection_locator) is not None) and enable_write
         self._updating_lock = threading.Lock()
 
     def same(self, i):
         return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator
 
     def writable(self):
-        return self.collection.writable() if self.collection is not None else self._writable
+        return self._enable_write and (self.collection.writable() if self.collection is not None else self._writable)
+
+    @use_counter
+    def flush(self):
+        if not self.writable():
+            return
+        with llfuse.lock_released:
+            with self._updating_lock:
+                if self.collection.committed():
+                    self.collection.update()
+                else:
+                    self.collection.save()
+                self.new_collection_record(self.collection.api_response())
 
     def want_event_subscribe(self):
         return (uuid_pattern.match(self.collection_locator) is not None)
 
-    # Used by arv-web.py to switch the contents of the CollectionDirectory
-    def change_collection(self, new_locator):
-        """Switch the contents of the CollectionDirectory.
-
-        Must be called with llfuse.lock held.
-        """
-
-        self.collection_locator = new_locator
-        self.collection_record = None
-        self.update()
-
     def new_collection(self, new_collection_record, coll_reader):
         if self.inode:
             self.clear()
-
-        self.collection_record = new_collection_record
-
-        if self.collection_record:
-            self._mtime = convertTime(self.collection_record.get('modified_at'))
-            self.collection_locator = self.collection_record["uuid"]
-            if self.collection_record_file is not None:
-                self.collection_record_file.update(self.collection_record)
-
         self.collection = coll_reader
+        self.new_collection_record(new_collection_record)
         self.populate(self.mtime())
 
+    def new_collection_record(self, new_collection_record):
+        if not new_collection_record:
+            raise Exception("invalid new_collection_record")
+        self._mtime = convertTime(new_collection_record.get('modified_at'))
+        self._manifest_size = len(new_collection_record["manifest_text"])
+        self.collection_locator = new_collection_record["uuid"]
+        if self.collection_record_file is not None:
+            self.collection_record_file.invalidate()
+            self.inodes.invalidate_inode(self.collection_record_file)
+            _logger.debug("%s invalidated collection record file", self)
+        self.fresh()
+
     def uuid(self):
         return self.collection_locator
 
     @use_counter
-    def update(self, to_record_version=None):
+    def update(self):
         try:
-            if self.collection_record is not None and portable_data_hash_pattern.match(self.collection_locator):
+            if self.collection is not None and portable_data_hash_pattern.match(self.collection_locator):
+                # It's immutable, nothing to update
                 return True
 
             if self.collection_locator is None:
+                # No collection locator to retrieve from
                 self.fresh()
                 return True
 
+            new_collection_record = None
             try:
                 with llfuse.lock_released:
                     self._updating_lock.acquire()
                     if not self.stale():
-                        return
+                        return True
 
-                    _logger.debug("Updating collection %s inode %s to record version %s", self.collection_locator, self.inode, to_record_version)
-                    new_collection_record = None
+                    _logger.debug("Updating collection %s inode %s", self.collection_locator, self.inode)
+                    coll_reader = None
                     if self.collection is not None:
-                        if self.collection.known_past_version(to_record_version):
-                            _logger.debug("%s already processed %s", self.collection_locator, to_record_version)
-                        else:
-                            self.collection.update()
+                        # Already have a collection object
+                        self.collection.update()
+                        new_collection_record = self.collection.api_response()
                     else:
+                        get_threads = max(self.api.keep.block_cache.cache_max // (64 * 1024 * 1024)) - 1, 0)
+                        # Create a new collection object
                         if uuid_pattern.match(self.collection_locator):
                             coll_reader = arvados.collection.Collection(
                                 self.collection_locator, self.api, self.api.keep,
-                                num_retries=self.num_retries)
+                                num_retries=self.num_retries,
+                                get_threads=get_threads)
                         else:
                             coll_reader = arvados.collection.CollectionReader(
                                 self.collection_locator, self.api, self.api.keep,
-                                num_retries=self.num_retries)
+                                num_retries=self.num_retries,
+                                get_threads=get_threads)
+                            )
                         new_collection_record = coll_reader.api_response() or {}
                         # If the Collection only exists in Keep, there will be no API
                         # response.  Fill in the fields we need.
@@ -520,14 +551,13 @@ class CollectionDirectory(CollectionDirectoryBase):
                             new_collection_record['storage_classes_desired'] = coll_reader.storage_classes_desired()
 
                 # end with llfuse.lock_released, re-acquire lock
-                if (new_collection_record is not None and
-                    (self.collection_record is None or
-                     self.collection_record["portable_data_hash"] != new_collection_record.get("portable_data_hash"))):
-                    self.new_collection(new_collection_record, coll_reader)
-                    self._manifest_size = len(coll_reader.manifest_text())
-                    _logger.debug("%s manifest_size %i", self, self._manifest_size)
 
-                self.fresh()
+                if new_collection_record is not None:
+                    if coll_reader is not None:
+                        self.new_collection(new_collection_record, coll_reader)
+                    else:
+                        self.new_collection_record(new_collection_record)
+
                 return True
             finally:
                 self._updating_lock.release()
@@ -535,22 +565,29 @@ class CollectionDirectory(CollectionDirectoryBase):
             _logger.error("Error fetching collection '%s': %s", self.collection_locator, e)
         except arvados.errors.ArgumentError as detail:
             _logger.warning("arv-mount %s: error %s", self.collection_locator, detail)
-            if self.collection_record is not None and "manifest_text" in self.collection_record:
-                _logger.warning("arv-mount manifest_text is: %s", self.collection_record["manifest_text"])
+            if new_collection_record is not None and "manifest_text" in new_collection_record:
+                _logger.warning("arv-mount manifest_text is: %s", new_collection_record["manifest_text"])
         except Exception:
             _logger.exception("arv-mount %s: error", self.collection_locator)
-            if self.collection_record is not None and "manifest_text" in self.collection_record:
-                _logger.error("arv-mount manifest_text is: %s", self.collection_record["manifest_text"])
+            if new_collection_record is not None and "manifest_text" in new_collection_record:
+                _logger.error("arv-mount manifest_text is: %s", new_collection_record["manifest_text"])
         self.invalidate()
         return False
 
+    @use_counter
+    def collection_record(self):
+        self.flush()
+        return self.collection.api_response()
+
     @use_counter
     @check_update
     def __getitem__(self, item):
         if item == '.arvados#collection':
             if self.collection_record_file is None:
-                self.collection_record_file = ObjectFile(self.inode, self.collection_record)
+                self.collection_record_file = FuncToJSONFile(
+                    self.inode, self.collection_record)
                 self.inodes.add_entry(self.collection_record_file)
+            self.invalidate()  # use lookup as a signal to force update
             return self.collection_record_file
         else:
             return super(CollectionDirectory, self).__getitem__(item)
@@ -562,8 +599,9 @@ class CollectionDirectory(CollectionDirectoryBase):
             return super(CollectionDirectory, self).__contains__(k)
 
     def invalidate(self):
-        self.collection_record = None
-        self.collection_record_file = None
+        if self.collection_record_file is not None:
+            self.collection_record_file.invalidate()
+            self.inodes.invalidate_inode(self.collection_record_file)
         super(CollectionDirectory, self).invalidate()
 
     def persisted(self):
@@ -603,40 +641,42 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
         def save_new(self):
             pass
 
-    def __init__(self, parent_inode, inodes, api_client, num_retries, storage_classes=None):
+    def __init__(self, parent_inode, inodes, api_client, num_retries, enable_write, storage_classes=None):
         collection = self.UnsaveableCollection(
             api_client=api_client,
             keep_client=api_client.keep,
             num_retries=num_retries,
             storage_classes_desired=storage_classes)
+        # This is always enable_write=True because it never tries to
+        # save to the backend
         super(TmpCollectionDirectory, self).__init__(
-            parent_inode, inodes, api_client.config, collection)
-        self.collection_record_file = None
+            parent_inode, inodes, api_client.config, True, collection, self)
         self.populate(self.mtime())
 
     def on_event(self, *args, **kwargs):
         super(TmpCollectionDirectory, self).on_event(*args, **kwargs)
-        if self.collection_record_file:
+        if self.collection_record_file is None:
+            return
 
-            # See discussion in CollectionDirectoryBase.on_event
-            lockcount = 0
-            try:
-                while True:
-                    self.collection.lock.release()
-                    lockcount += 1
-            except RuntimeError:
-                pass
+        # See discussion in CollectionDirectoryBase.on_event
+        lockcount = 0
+        try:
+            while True:
+                self.collection.lock.release()
+                lockcount += 1
+        except RuntimeError:
+            pass
 
-            try:
-                with llfuse.lock:
-                    with self.collection.lock:
-                        self.collection_record_file.invalidate()
-                        self.inodes.invalidate_inode(self.collection_record_file)
-                        _logger.debug("%s invalidated collection record", self)
-            finally:
-                while lockcount > 0:
-                    self.collection.lock.acquire()
-                    lockcount -= 1
+        try:
+            with llfuse.lock:
+                with self.collection.lock:
+                    self.collection_record_file.invalidate()
+                    self.inodes.invalidate_inode(self.collection_record_file)
+                    _logger.debug("%s invalidated collection record", self)
+        finally:
+            while lockcount > 0:
+                self.collection.lock.acquire()
+                lockcount -= 1
 
     def collection_record(self):
         with llfuse.lock_released:
@@ -667,6 +707,9 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
     def writable(self):
         return True
 
+    def flush(self):
+        pass
+
     def want_event_subscribe(self):
         return False
 
@@ -703,8 +746,8 @@ and the directory will appear if it exists.
 
 """.lstrip()
 
-    def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False, storage_classes=None):
-        super(MagicDirectory, self).__init__(parent_inode, inodes, api.config)
+    def __init__(self, parent_inode, inodes, api, num_retries, enable_write, pdh_only=False, storage_classes=None):
+        super(MagicDirectory, self).__init__(parent_inode, inodes, api.config, enable_write)
         self.api = api
         self.num_retries = num_retries
         self.pdh_only = pdh_only
@@ -720,7 +763,8 @@ and the directory will appear if it exists.
             # If we're the root directory, add an identical by_id subdirectory.
             if self.inode == llfuse.ROOT_INODE:
                 self._entries['by_id'] = self.inodes.add_entry(MagicDirectory(
-                        self.inode, self.inodes, self.api, self.num_retries, self.pdh_only))
+                    self.inode, self.inodes, self.api, self.num_retries, self._enable_write,
+                    self.pdh_only))
 
     def __contains__(self, k):
         if k in self._entries:
@@ -738,11 +782,11 @@ and the directory will appear if it exists.
                 if project[u'items_available'] == 0:
                     return False
                 e = self.inodes.add_entry(ProjectDirectory(
-                    self.inode, self.inodes, self.api, self.num_retries,
+                    self.inode, self.inodes, self.api, self.num_retries, self._enable_write,
                     project[u'items'][0], storage_classes=self.storage_classes))
             else:
                 e = self.inodes.add_entry(CollectionDirectory(
-                        self.inode, self.inodes, self.api, self.num_retries, k))
+                        self.inode, self.inodes, self.api, self.num_retries, self._enable_write, k))
 
             if e.update():
                 if k not in self._entries:
@@ -776,8 +820,8 @@ and the directory will appear if it exists.
 class TagsDirectory(Directory):
     """A special directory that contains as subdirectories all tags visible to the user."""
 
-    def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60):
-        super(TagsDirectory, self).__init__(parent_inode, inodes, api.config)
+    def __init__(self, parent_inode, inodes, api, num_retries, enable_write, poll_time=60):
+        super(TagsDirectory, self).__init__(parent_inode, inodes, api.config, enable_write)
         self.api = api
         self.num_retries = num_retries
         self._poll = True
@@ -798,7 +842,8 @@ class TagsDirectory(Directory):
             self.merge(tags['items']+[{"name": n} for n in self._extra],
                        lambda i: i['name'],
                        lambda a, i: a.tag == i['name'],
-                       lambda i: TagDirectory(self.inode, self.inodes, self.api, self.num_retries, i['name'], poll=self._poll, poll_time=self._poll_time))
+                       lambda i: TagDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write,
+                                              i['name'], poll=self._poll, poll_time=self._poll_time))
 
     @use_counter
     @check_update
@@ -832,9 +877,9 @@ class TagDirectory(Directory):
     to the user that are tagged with a particular tag.
     """
 
-    def __init__(self, parent_inode, inodes, api, num_retries, tag,
+    def __init__(self, parent_inode, inodes, api, num_retries, enable_write, tag,
                  poll=False, poll_time=60):
-        super(TagDirectory, self).__init__(parent_inode, inodes, api.config)
+        super(TagDirectory, self).__init__(parent_inode, inodes, api.config, enable_write)
         self.api = api
         self.num_retries = num_retries
         self.tag = tag
@@ -856,15 +901,15 @@ class TagDirectory(Directory):
         self.merge(taggedcollections['items'],
                    lambda i: i['head_uuid'],
                    lambda a, i: a.collection_locator == i['head_uuid'],
-                   lambda i: CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid']))
+                   lambda i: CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write, i['head_uuid']))
 
 
 class ProjectDirectory(Directory):
     """A special directory that contains the contents of a project."""
 
-    def __init__(self, parent_inode, inodes, api, num_retries, project_object,
+    def __init__(self, parent_inode, inodes, api, num_retries, enable_write, project_object,
                  poll=True, poll_time=3, storage_classes=None):
-        super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config)
+        super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config, enable_write)
         self.api = api
         self.num_retries = num_retries
         self.project_object = project_object
@@ -882,12 +927,13 @@ class ProjectDirectory(Directory):
 
     def createDirectory(self, i):
         if collection_uuid_pattern.match(i['uuid']):
-            return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i)
+            return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write, i)
         elif group_uuid_pattern.match(i['uuid']):
-            return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i, self._poll, self._poll_time, self.storage_classes)
+            return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write,
+                                    i, self._poll, self._poll_time, self.storage_classes)
         elif link_uuid_pattern.match(i['uuid']):
             if i['head_kind'] == 'arvados#collection' or portable_data_hash_pattern.match(i['head_uuid']):
-                return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid'])
+                return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write, i['head_uuid'])
             else:
                 return None
         elif uuid_pattern.match(i['uuid']):
@@ -954,11 +1000,13 @@ class ProjectDirectory(Directory):
                                                         uuid=self.project_uuid,
                                                         filters=[["uuid", "is_a", "arvados#group"],
                                                                  ["groups.group_class", "in", ["project","filter"]]]))
-                contents.extend(arvados.util.keyset_list_all(self.api.groups().contents,
+                contents.extend(filter(lambda i: i["current_version_uuid"] == i["uuid"],
+                                       arvados.util.keyset_list_all(self.api.groups().contents,
                                                              order_key="uuid",
                                                              num_retries=self.num_retries,
                                                              uuid=self.project_uuid,
-                                                             filters=[["uuid", "is_a", "arvados#collection"]]))
+                                                             filters=[["uuid", "is_a", "arvados#collection"]])))
+
 
             # end with llfuse.lock_released, re-acquire lock
 
@@ -1022,6 +1070,8 @@ class ProjectDirectory(Directory):
     @use_counter
     @check_update
     def writable(self):
+        if not self._enable_write:
+            return False
         with llfuse.lock_released:
             if not self._current_user:
                 self._current_user = self.api.users().current().execute(num_retries=self.num_retries)
@@ -1033,6 +1083,9 @@ class ProjectDirectory(Directory):
     @use_counter
     @check_update
     def mkdir(self, name):
+        if not self.writable():
+            raise llfuse.FUSEError(errno.EROFS)
+
         try:
             with llfuse.lock_released:
                 c = {
@@ -1053,6 +1106,9 @@ class ProjectDirectory(Directory):
     @use_counter
     @check_update
     def rmdir(self, name):
+        if not self.writable():
+            raise llfuse.FUSEError(errno.EROFS)
+
         if name not in self:
             raise llfuse.FUSEError(errno.ENOENT)
         if not isinstance(self[name], CollectionDirectory):
@@ -1066,6 +1122,9 @@ class ProjectDirectory(Directory):
     @use_counter
     @check_update
     def rename(self, name_old, name_new, src):
+        if not self.writable():
+            raise llfuse.FUSEError(errno.EROFS)
+
         if not isinstance(src, ProjectDirectory):
             raise llfuse.FUSEError(errno.EPERM)
 
@@ -1138,9 +1197,9 @@ class ProjectDirectory(Directory):
 class SharedDirectory(Directory):
     """A special directory that represents users or groups who have shared projects with me."""
 
-    def __init__(self, parent_inode, inodes, api, num_retries, exclude,
+    def __init__(self, parent_inode, inodes, api, num_retries, enable_write, exclude,
                  poll=False, poll_time=60, storage_classes=None):
-        super(SharedDirectory, self).__init__(parent_inode, inodes, api.config)
+        super(SharedDirectory, self).__init__(parent_inode, inodes, api.config, enable_write)
         self.api = api
         self.num_retries = num_retries
         self.current_user = api.users().current().execute(num_retries=num_retries)
@@ -1231,7 +1290,8 @@ class SharedDirectory(Directory):
             self.merge(contents.items(),
                        lambda i: i[0],
                        lambda a, i: a.uuid() == i[1]['uuid'],
-                       lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time, storage_classes=self.storage_classes))
+                       lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, self._enable_write,
+                                                  i[1], poll=self._poll, poll_time=self._poll_time, storage_classes=self.storage_classes))
         except Exception:
             _logger.exception("arv-mount shared dir error")
         finally: