Merge branch '17995-filter-by-comparing-attrs'
[arvados.git] / services / fuse / arvados_fuse / fusedir.py
index 1fab2e0fb89d22d4362dd56d017e55519ead6f09..d5a018ae88fcd859adc3047ad2384732a0bfbe92 100644 (file)
@@ -2,11 +2,6 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-from __future__ import absolute_import
-from __future__ import division
-from future.utils import viewitems
-from future.utils import itervalues
-from builtins import dict
 import apiclient
 import arvados
 import errno
@@ -196,7 +191,7 @@ class Directory(FreshBase):
     def in_use(self):
         if super(Directory, self).in_use():
             return True
-        for v in itervalues(self._entries):
+        for v in self._entries.values():
             if v.in_use():
                 return True
         return False
@@ -204,7 +199,7 @@ class Directory(FreshBase):
     def has_ref(self, only_children):
         if super(Directory, self).has_ref(only_children):
             return True
-        for v in itervalues(self._entries):
+        for v in self._entries.values():
             if v.has_ref(False):
                 return True
         return False
@@ -226,7 +221,7 @@ class Directory(FreshBase):
         # Find self on the parent in order to invalidate this path.
         # Calling the public items() method might trigger a refresh,
         # which we definitely don't want, so read the internal dict directly.
-        for k,v in viewitems(parent._entries):
+        for k,v in parent._entries.items():
             if v is self:
                 self.inodes.invalidate_entry(parent, k)
                 break
@@ -298,26 +293,59 @@ class CollectionDirectoryBase(Directory):
     def on_event(self, event, collection, name, item):
         if collection == self.collection:
             name = self.sanitize_filename(name)
-            _logger.debug("collection notify %s %s %s %s", event, collection, name, item)
-            with llfuse.lock:
-                if event == arvados.collection.ADD:
-                    self.new_entry(name, item, self.mtime())
-                elif event == arvados.collection.DEL:
-                    ent = self._entries[name]
-                    del self._entries[name]
-                    self.inodes.invalidate_entry(self, name)
-                    self.inodes.del_entry(ent)
-                elif event == arvados.collection.MOD:
-                    if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
-                        self.inodes.invalidate_inode(item.fuse_entry)
-                    elif name in self._entries:
-                        self.inodes.invalidate_inode(self._entries[name])
+
+            #
+            # It's possible for another thread to have llfuse.lock and
+            # be waiting on collection.lock.  Meanwhile, we released
+            # llfuse.lock earlier in the stack, but are still holding
+            # on to the collection lock, and now we need to re-acquire
+            # llfuse.lock.  If we don't release the collection lock,
+            # we'll deadlock where we're holding the collection lock
+            # waiting for llfuse.lock and the other thread is holding
+            # llfuse.lock and waiting for the collection lock.
+            #
+            # The correct locking order here is to take llfuse.lock
+            # first, then the collection lock.
+            #
+            # Since collection.lock is an RLock, it might be locked
+            # multiple times, so we need to release it multiple times,
+            # keep a count, then re-lock it the correct number of
+            # times.
+            #
+            lockcount = 0
+            try:
+                while True:
+                    self.collection.lock.release()
+                    lockcount += 1
+            except RuntimeError:
+                pass
+
+            try:
+                with llfuse.lock:
+                    with self.collection.lock:
+                        if event == arvados.collection.ADD:
+                            self.new_entry(name, item, self.mtime())
+                        elif event == arvados.collection.DEL:
+                            ent = self._entries[name]
+                            del self._entries[name]
+                            self.inodes.invalidate_entry(self, name)
+                            self.inodes.del_entry(ent)
+                        elif event == arvados.collection.MOD:
+                            if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
+                                self.inodes.invalidate_inode(item.fuse_entry)
+                            elif name in self._entries:
+                                self.inodes.invalidate_inode(self._entries[name])
+            finally:
+                while lockcount > 0:
+                    self.collection.lock.acquire()
+                    lockcount -= 1
 
     def populate(self, mtime):
         self._mtime = mtime
-        self.collection.subscribe(self.on_event)
-        for entry, item in viewitems(self.collection):
-            self.new_entry(entry, item, self.mtime())
+        with self.collection.lock:
+            self.collection.subscribe(self.on_event)
+            for entry, item in self.collection.items():
+                self.new_entry(entry, item, self.mtime())
 
     def writable(self):
         return self.collection.writable()
@@ -464,6 +492,7 @@ class CollectionDirectory(CollectionDirectoryBase):
                         return
 
                     _logger.debug("Updating collection %s inode %s to record version %s", self.collection_locator, self.inode, to_record_version)
+                    new_collection_record = None
                     if self.collection is not None:
                         if self.collection.known_past_version(to_record_version):
                             _logger.debug("%s already processed %s", self.collection_locator, to_record_version)
@@ -487,13 +516,16 @@ class CollectionDirectory(CollectionDirectoryBase):
                             new_collection_record["portable_data_hash"] = new_collection_record["uuid"]
                         if 'manifest_text' not in new_collection_record:
                             new_collection_record['manifest_text'] = coll_reader.manifest_text()
+                        if 'storage_classes_desired' not in new_collection_record:
+                            new_collection_record['storage_classes_desired'] = coll_reader.storage_classes_desired()
 
-                        if self.collection_record is None or self.collection_record["portable_data_hash"] != new_collection_record.get("portable_data_hash"):
-                            self.new_collection(new_collection_record, coll_reader)
-
-                        self._manifest_size = len(coll_reader.manifest_text())
-                        _logger.debug("%s manifest_size %i", self, self._manifest_size)
                 # end with llfuse.lock_released, re-acquire lock
+                if (new_collection_record is not None and
+                    (self.collection_record is None or
+                     self.collection_record["portable_data_hash"] != new_collection_record.get("portable_data_hash"))):
+                    self.new_collection(new_collection_record, coll_reader)
+                    self._manifest_size = len(coll_reader.manifest_text())
+                    _logger.debug("%s manifest_size %i", self, self._manifest_size)
 
                 self.fresh()
                 return True
@@ -571,11 +603,12 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
         def save_new(self):
             pass
 
-    def __init__(self, parent_inode, inodes, api_client, num_retries):
+    def __init__(self, parent_inode, inodes, api_client, num_retries, storage_classes=None):
         collection = self.UnsaveableCollection(
             api_client=api_client,
             keep_client=api_client.keep,
-            num_retries=num_retries)
+            num_retries=num_retries,
+            storage_classes_desired=storage_classes)
         super(TmpCollectionDirectory, self).__init__(
             parent_inode, inodes, api_client.config, collection)
         self.collection_record_file = None
@@ -584,10 +617,26 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
     def on_event(self, *args, **kwargs):
         super(TmpCollectionDirectory, self).on_event(*args, **kwargs)
         if self.collection_record_file:
-            with llfuse.lock:
-                self.collection_record_file.invalidate()
-            self.inodes.invalidate_inode(self.collection_record_file)
-            _logger.debug("%s invalidated collection record", self)
+
+            # See discussion in CollectionDirectoryBase.on_event
+            lockcount = 0
+            try:
+                while True:
+                    self.collection.lock.release()
+                    lockcount += 1
+            except RuntimeError:
+                pass
+
+            try:
+                with llfuse.lock:
+                    with self.collection.lock:
+                        self.collection_record_file.invalidate()
+                        self.inodes.invalidate_inode(self.collection_record_file)
+                        _logger.debug("%s invalidated collection record", self)
+            finally:
+                while lockcount > 0:
+                    self.collection.lock.acquire()
+                    lockcount -= 1
 
     def collection_record(self):
         with llfuse.lock_released:
@@ -595,6 +644,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
                 "uuid": None,
                 "manifest_text": self.collection.manifest_text(),
                 "portable_data_hash": self.collection.portable_data_hash(),
+                "storage_classes_desired": self.collection.storage_classes_desired(),
             }
 
     def __contains__(self, k):
@@ -653,11 +703,12 @@ and the directory will appear if it exists.
 
 """.lstrip()
 
-    def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False):
+    def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False, storage_classes=None):
         super(MagicDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self.pdh_only = pdh_only
+        self.storage_classes = storage_classes
 
     def __setattr__(self, name, value):
         super(MagicDirectory, self).__setattr__(name, value)
@@ -683,11 +734,12 @@ and the directory will appear if it exists.
 
             if group_uuid_pattern.match(k):
                 project = self.api.groups().list(
-                    filters=[['group_class', '=', 'project'], ["uuid", "=", k]]).execute(num_retries=self.num_retries)
+                    filters=[['group_class', 'in', ['project','filter']], ["uuid", "=", k]]).execute(num_retries=self.num_retries)
                 if project[u'items_available'] == 0:
                     return False
                 e = self.inodes.add_entry(ProjectDirectory(
-                    self.inode, self.inodes, self.api, self.num_retries, project[u'items'][0]))
+                    self.inode, self.inodes, self.api, self.num_retries,
+                    project[u'items'][0], storage_classes=self.storage_classes))
             else:
                 e = self.inodes.add_entry(CollectionDirectory(
                         self.inode, self.inodes, self.api, self.num_retries, k))
@@ -811,7 +863,7 @@ class ProjectDirectory(Directory):
     """A special directory that contains the contents of a project."""
 
     def __init__(self, parent_inode, inodes, api, num_retries, project_object,
-                 poll=False, poll_time=60):
+                 poll=True, poll_time=3, storage_classes=None):
         super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
@@ -823,6 +875,7 @@ class ProjectDirectory(Directory):
         self._updating_lock = threading.Lock()
         self._current_user = None
         self._full_listing = False
+        self.storage_classes = storage_classes
 
     def want_event_subscribe(self):
         return True
@@ -831,7 +884,7 @@ class ProjectDirectory(Directory):
         if collection_uuid_pattern.match(i['uuid']):
             return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i)
         elif group_uuid_pattern.match(i['uuid']):
-            return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i, self._poll, self._poll_time)
+            return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i, self._poll, self._poll_time, self.storage_classes)
         elif link_uuid_pattern.match(i['uuid']):
             if i['head_kind'] == 'arvados#collection' or portable_data_hash_pattern.match(i['head_uuid']):
                 return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid'])
@@ -895,15 +948,17 @@ class ProjectDirectory(Directory):
                     self.project_object = self.api.users().get(
                         uuid=self.project_uuid).execute(num_retries=self.num_retries)
                 # do this in 2 steps until #17424 is fixed
-                contents = arvados.util.list_all(self.api.groups().contents,
-                                                 self.num_retries,
-                                                 uuid=self.project_uuid,
-                                                 filters=[["uuid", "is_a", "arvados#group"],
-                                                          ["group_class", "=", "project"]])
-                contents.extend(arvados.util.list_all(self.api.groups().contents,
-                                                      self.num_retries,
-                                                      uuid=self.project_uuid,
-                                                      filters=[["uuid", "is_a", "arvados#collection"]]))
+                contents = list(arvados.util.keyset_list_all(self.api.groups().contents,
+                                                        order_key="uuid",
+                                                        num_retries=self.num_retries,
+                                                        uuid=self.project_uuid,
+                                                        filters=[["uuid", "is_a", "arvados#group"],
+                                                                 ["groups.group_class", "in", ["project","filter"]]]))
+                contents.extend(arvados.util.keyset_list_all(self.api.groups().contents,
+                                                             order_key="uuid",
+                                                             num_retries=self.num_retries,
+                                                             uuid=self.project_uuid,
+                                                             filters=[["uuid", "is_a", "arvados#collection"]]))
 
             # end with llfuse.lock_released, re-acquire lock
 
@@ -934,7 +989,7 @@ class ProjectDirectory(Directory):
             else:
                 namefilter = ["name", "in", [k, k2]]
             contents = self.api.groups().list(filters=[["owner_uuid", "=", self.project_uuid],
-                                                       ["group_class", "=", "project"],
+                                                       ["group_class", "in", ["project","filter"]],
                                                        namefilter],
                                               limit=2).execute(num_retries=self.num_retries)["items"]
             if not contents:
@@ -980,9 +1035,16 @@ class ProjectDirectory(Directory):
     def mkdir(self, name):
         try:
             with llfuse.lock_released:
-                self.api.collections().create(body={"owner_uuid": self.project_uuid,
-                                                    "name": name,
-                                                    "manifest_text": ""}).execute(num_retries=self.num_retries)
+                c = {
+                    "owner_uuid": self.project_uuid,
+                    "name": name,
+                    "manifest_text": "" }
+                if self.storage_classes is not None:
+                    c["storage_classes_desired"] = self.storage_classes
+                try:
+                    self.api.collections().create(body=c).execute(num_retries=self.num_retries)
+                except Exception as e:
+                    raise
             self.invalidate()
         except apiclient_errors.Error as error:
             _logger.error(error)
@@ -1077,7 +1139,7 @@ class SharedDirectory(Directory):
     """A special directory that represents users or groups who have shared projects with me."""
 
     def __init__(self, parent_inode, inodes, api, num_retries, exclude,
-                 poll=False, poll_time=60):
+                 poll=False, poll_time=60, storage_classes=None):
         super(SharedDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
@@ -1085,6 +1147,7 @@ class SharedDirectory(Directory):
         self._poll = True
         self._poll_time = poll_time
         self._updating_lock = threading.Lock()
+        self.storage_classes = storage_classes
 
     @use_counter
     def update(self):
@@ -1103,7 +1166,7 @@ class SharedDirectory(Directory):
                 if 'httpMethod' in methods.get('shared', {}):
                     page = []
                     while True:
-                        resp = self.api.groups().shared(filters=[['group_class', '=', 'project']]+page,
+                        resp = self.api.groups().shared(filters=[['group_class', 'in', ['project','filter']]]+page,
                                                         order="uuid",
                                                         limit=10000,
                                                         count="none",
@@ -1118,10 +1181,12 @@ class SharedDirectory(Directory):
                             objects[r["uuid"]] = r
                             root_owners.add(r["uuid"])
                 else:
-                    all_projects = arvados.util.list_all(
-                        self.api.groups().list, self.num_retries,
-                        filters=[['group_class','=','project']],
-                        select=["uuid", "owner_uuid"])
+                    all_projects = list(arvados.util.keyset_list_all(
+                        self.api.groups().list,
+                        order_key="uuid",
+                        num_retries=self.num_retries,
+                        filters=[['group_class','in',['project','filter']]],
+                        select=["uuid", "owner_uuid"]))
                     for ob in all_projects:
                         objects[ob['uuid']] = ob
 
@@ -1131,11 +1196,15 @@ class SharedDirectory(Directory):
                             roots.append(ob['uuid'])
                             root_owners.add(ob['owner_uuid'])
 
-                    lusers = arvados.util.list_all(
-                        self.api.users().list, self.num_retries,
+                    lusers = arvados.util.keyset_list_all(
+                        self.api.users().list,
+                        order_key="uuid",
+                        num_retries=self.num_retries,
                         filters=[['uuid','in', list(root_owners)]])
-                    lgroups = arvados.util.list_all(
-                        self.api.groups().list, self.num_retries,
+                    lgroups = arvados.util.keyset_list_all(
+                        self.api.groups().list,
+                        order_key="uuid",
+                        num_retries=self.num_retries,
                         filters=[['uuid','in', list(root_owners)+roots]])
 
                     for l in lusers:
@@ -1148,8 +1217,6 @@ class SharedDirectory(Directory):
                         obr = objects[r]
                         if obr.get("name"):
                             contents[obr["name"]] = obr
-                        #elif obr.get("username"):
-                        #    contents[obr["username"]] = obr
                         elif "first_name" in obr:
                             contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr
 
@@ -1161,10 +1228,10 @@ class SharedDirectory(Directory):
 
             # end with llfuse.lock_released, re-acquire lock
 
-            self.merge(viewitems(contents),
+            self.merge(contents.items(),
                        lambda i: i[0],
                        lambda a, i: a.uuid() == i[1]['uuid'],
-                       lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time))
+                       lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time, storage_classes=self.storage_classes))
         except Exception:
             _logger.exception("arv-mount shared dir error")
         finally: