17119: make arvados_fuse.fusedir.ProjectDirectory use the group contents endpoint.
[arvados.git] / services / fuse / arvados_fuse / fusedir.py
index 34fd594be2dd6ce2793eebe1ac884c92aa8fdd58..1fab2e0fb89d22d4362dd56d017e55519ead6f09 100644 (file)
@@ -2,20 +2,25 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-import logging
-import re
-import time
-import llfuse
-import arvados
+from __future__ import absolute_import
+from __future__ import division
+from future.utils import viewitems
+from future.utils import itervalues
+from builtins import dict
 import apiclient
+import arvados
+import errno
 import functools
+import llfuse
+import logging
+import re
+import sys
 import threading
-from apiclient import errors as apiclient_errors
-import errno
 import time
+from apiclient import errors as apiclient_errors
 
-from fusefile import StringFile, ObjectFile, FuncToJSONFile, FuseArvadosFile
-from fresh import FreshBase, convertTime, use_counter, check_update
+from .fusefile import StringFile, ObjectFile, FuncToJSONFile, FuseArvadosFile
+from .fresh import FreshBase, convertTime, use_counter, check_update
 
 import arvados.collection
 from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern
@@ -28,20 +33,6 @@ _logger = logging.getLogger('arvados.arvados_fuse')
 # appear as underscores in the fuse mount.)
 _disallowed_filename_characters = re.compile('[\x00/]')
 
-# '.' and '..' are not reachable if API server is newer than #6277
-def sanitize_filename(dirty):
-    """Replace disallowed filename characters with harmless "_"."""
-    if dirty is None:
-        return None
-    elif dirty == '':
-        return '_'
-    elif dirty == '.':
-        return '_'
-    elif dirty == '..':
-        return '__'
-    else:
-        return _disallowed_filename_characters.sub('_', dirty)
-
 
 class Directory(FreshBase):
     """Generic directory object, backed by a dict.
@@ -50,7 +41,7 @@ class Directory(FreshBase):
     and the value referencing a File or Directory object.
     """
 
-    def __init__(self, parent_inode, inodes):
+    def __init__(self, parent_inode, inodes, apiconfig):
         """parent_inode is the integer inode number"""
 
         super(Directory, self).__init__()
@@ -60,11 +51,53 @@ class Directory(FreshBase):
             raise Exception("parent_inode should be an int")
         self.parent_inode = parent_inode
         self.inodes = inodes
+        self.apiconfig = apiconfig
         self._entries = {}
         self._mtime = time.time()
 
-    #  Overriden by subclasses to implement logic to update the entries dict
-    #  when the directory is stale
+    def forward_slash_subst(self):
+        if not hasattr(self, '_fsns'):
+            self._fsns = None
+            config = self.apiconfig()
+            try:
+                self._fsns = config["Collections"]["ForwardSlashNameSubstitution"]
+            except KeyError:
+                # old API server with no FSNS config
+                self._fsns = '_'
+            else:
+                if self._fsns == '' or self._fsns == '/':
+                    self._fsns = None
+        return self._fsns
+
+    def unsanitize_filename(self, incoming):
+        """Replace ForwardSlashNameSubstitution value with /"""
+        fsns = self.forward_slash_subst()
+        if isinstance(fsns, str):
+            return incoming.replace(fsns, '/')
+        else:
+            return incoming
+
+    def sanitize_filename(self, dirty):
+        """Replace disallowed filename characters according to
+        ForwardSlashNameSubstitution in self.api_config."""
+        # '.' and '..' are not reachable if API server is newer than #6277
+        if dirty is None:
+            return None
+        elif dirty == '':
+            return '_'
+        elif dirty == '.':
+            return '_'
+        elif dirty == '..':
+            return '__'
+        else:
+            fsns = self.forward_slash_subst()
+            if isinstance(fsns, str):
+                dirty = dirty.replace('/', fsns)
+            return _disallowed_filename_characters.sub('_', dirty)
+
+
+    #  Overridden by subclasses to implement logic to update the
+    #  entries dict when the directory is stale
     @use_counter
     def update(self):
         pass
@@ -133,7 +166,7 @@ class Directory(FreshBase):
         self._entries = {}
         changed = False
         for i in items:
-            name = sanitize_filename(fn(i))
+            name = self.sanitize_filename(fn(i))
             if name:
                 if name in oldentries and same(oldentries[name], i):
                     # move existing directory entry over
@@ -163,7 +196,7 @@ class Directory(FreshBase):
     def in_use(self):
         if super(Directory, self).in_use():
             return True
-        for v in self._entries.itervalues():
+        for v in itervalues(self._entries):
             if v.in_use():
                 return True
         return False
@@ -171,7 +204,7 @@ class Directory(FreshBase):
     def has_ref(self, only_children):
         if super(Directory, self).has_ref(only_children):
             return True
-        for v in self._entries.itervalues():
+        for v in itervalues(self._entries):
             if v.has_ref(False):
                 return True
         return False
@@ -193,7 +226,7 @@ class Directory(FreshBase):
         # Find self on the parent in order to invalidate this path.
         # Calling the public items() method might trigger a refresh,
         # which we definitely don't want, so read the internal dict directly.
-        for k,v in parent._entries.items():
+        for k,v in viewitems(parent._entries):
             if v is self:
                 self.inodes.invalidate_entry(parent, k)
                 break
@@ -241,12 +274,13 @@ class CollectionDirectoryBase(Directory):
 
     """
 
-    def __init__(self, parent_inode, inodes, collection):
-        super(CollectionDirectoryBase, self).__init__(parent_inode, inodes)
+    def __init__(self, parent_inode, inodes, apiconfig, collection):
+        super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig)
+        self.apiconfig = apiconfig
         self.collection = collection
 
     def new_entry(self, name, item, mtime):
-        name = sanitize_filename(name)
+        name = self.sanitize_filename(name)
         if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
             if item.fuse_entry.dead is not True:
                 raise Exception("Can only reparent dead inode entry")
@@ -255,7 +289,7 @@ class CollectionDirectoryBase(Directory):
             item.fuse_entry.dead = False
             self._entries[name] = item.fuse_entry
         elif isinstance(item, arvados.collection.RichCollectionBase):
-            self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, item))
+            self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, self.apiconfig, item))
             self._entries[name].populate(mtime)
         else:
             self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime))
@@ -263,7 +297,7 @@ class CollectionDirectoryBase(Directory):
 
     def on_event(self, event, collection, name, item):
         if collection == self.collection:
-            name = sanitize_filename(name)
+            name = self.sanitize_filename(name)
             _logger.debug("collection notify %s %s %s %s", event, collection, name, item)
             with llfuse.lock:
                 if event == arvados.collection.ADD:
@@ -282,7 +316,7 @@ class CollectionDirectoryBase(Directory):
     def populate(self, mtime):
         self._mtime = mtime
         self.collection.subscribe(self.on_event)
-        for entry, item in self.collection.items():
+        for entry, item in viewitems(self.collection):
             self.new_entry(entry, item, self.mtime())
 
     def writable(self):
@@ -352,14 +386,14 @@ class CollectionDirectory(CollectionDirectoryBase):
     """Represents the root of a directory tree representing a collection."""
 
     def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None):
-        super(CollectionDirectory, self).__init__(parent_inode, inodes, None)
+        super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config, None)
         self.api = api
         self.num_retries = num_retries
         self.collection_record_file = None
         self.collection_record = None
         self._poll = True
         try:
-            self._poll_time = (api._rootDesc.get('blobSignatureTtl', 60*60*2)/2)
+            self._poll_time = (api._rootDesc.get('blobSignatureTtl', 60*60*2) // 2)
         except:
             _logger.debug("Error getting blobSignatureTtl from discovery document: %s", sys.exc_info()[0])
             self._poll_time = 60*60
@@ -543,7 +577,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
             keep_client=api_client.keep,
             num_retries=num_retries)
         super(TmpCollectionDirectory, self).__init__(
-            parent_inode, inodes, collection)
+            parent_inode, inodes, api_client.config, collection)
         self.collection_record_file = None
         self.populate(self.mtime())
 
@@ -609,17 +643,18 @@ class MagicDirectory(Directory):
     README_TEXT = """
 This directory provides access to Arvados collections as subdirectories listed
 by uuid (in the form 'zzzzz-4zz18-1234567890abcde') or portable data hash (in
-the form '1234567890abcdef0123456789abcdef+123').
+the form '1234567890abcdef0123456789abcdef+123'), and Arvados projects by uuid
+(in the form 'zzzzz-j7d0g-1234567890abcde').
 
 Note that this directory will appear empty until you attempt to access a
-specific collection subdirectory (such as trying to 'cd' into it), at which
-point the collection will actually be looked up on the server and the directory
-will appear if it exists.
+specific collection or project subdirectory (such as trying to 'cd' into it),
+at which point the collection or project will actually be looked up on the server
+and the directory will appear if it exists.
 
 """.lstrip()
 
     def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False):
-        super(MagicDirectory, self).__init__(parent_inode, inodes)
+        super(MagicDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self.pdh_only = pdh_only
@@ -645,8 +680,17 @@ will appear if it exists.
 
         try:
             e = None
-            e = self.inodes.add_entry(CollectionDirectory(
-                    self.inode, self.inodes, self.api, self.num_retries, k))
+
+            if group_uuid_pattern.match(k):
+                project = self.api.groups().list(
+                    filters=[['group_class', '=', 'project'], ["uuid", "=", k]]).execute(num_retries=self.num_retries)
+                if project[u'items_available'] == 0:
+                    return False
+                e = self.inodes.add_entry(ProjectDirectory(
+                    self.inode, self.inodes, self.api, self.num_retries, project[u'items'][0]))
+            else:
+                e = self.inodes.add_entry(CollectionDirectory(
+                        self.inode, self.inodes, self.api, self.num_retries, k))
 
             if e.update():
                 if k not in self._entries:
@@ -681,7 +725,7 @@ class TagsDirectory(Directory):
     """A special directory that contains as subdirectories all tags visible to the user."""
 
     def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60):
-        super(TagsDirectory, self).__init__(parent_inode, inodes)
+        super(TagsDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self._poll = True
@@ -738,7 +782,7 @@ class TagDirectory(Directory):
 
     def __init__(self, parent_inode, inodes, api, num_retries, tag,
                  poll=False, poll_time=60):
-        super(TagDirectory, self).__init__(parent_inode, inodes)
+        super(TagDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self.tag = tag
@@ -768,7 +812,7 @@ class ProjectDirectory(Directory):
 
     def __init__(self, parent_inode, inodes, api, num_retries, project_object,
                  poll=False, poll_time=60):
-        super(ProjectDirectory, self).__init__(parent_inode, inodes)
+        super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self.project_object = project_object
@@ -829,7 +873,7 @@ class ProjectDirectory(Directory):
             self.inodes.add_entry(self.project_object_file)
 
         if not self._full_listing:
-            return
+            return True
 
         def samefn(a, i):
             if isinstance(a, CollectionDirectory) or isinstance(a, ProjectDirectory):
@@ -850,14 +894,16 @@ class ProjectDirectory(Directory):
                 elif user_uuid_pattern.match(self.project_uuid):
                     self.project_object = self.api.users().get(
                         uuid=self.project_uuid).execute(num_retries=self.num_retries)
-
-                contents = arvados.util.list_all(self.api.groups().list,
+                # do this in 2 steps until #17424 is fixed
+                contents = arvados.util.list_all(self.api.groups().contents,
                                                  self.num_retries,
-                                                 filters=[["owner_uuid", "=", self.project_uuid],
+                                                 uuid=self.project_uuid,
+                                                 filters=[["uuid", "is_a", "arvados#group"],
                                                           ["group_class", "=", "project"]])
-                contents.extend(arvados.util.list_all(self.api.collections().list,
+                contents.extend(arvados.util.list_all(self.api.groups().contents,
                                                       self.num_retries,
-                                                      filters=[["owner_uuid", "=", self.project_uuid]]))
+                                                      uuid=self.project_uuid,
+                                                      filters=[["uuid", "is_a", "arvados#collection"]]))
 
             # end with llfuse.lock_released, re-acquire lock
 
@@ -865,6 +911,7 @@ class ProjectDirectory(Directory):
                        self.namefn,
                        samefn,
                        self.createDirectory)
+            return True
         finally:
             self._updating_lock.release()
 
@@ -881,16 +928,25 @@ class ProjectDirectory(Directory):
         elif self._full_listing or super(ProjectDirectory, self).__contains__(k):
             return super(ProjectDirectory, self).__getitem__(k)
         with llfuse.lock_released:
+            k2 = self.unsanitize_filename(k)
+            if k2 == k:
+                namefilter = ["name", "=", k]
+            else:
+                namefilter = ["name", "in", [k, k2]]
             contents = self.api.groups().list(filters=[["owner_uuid", "=", self.project_uuid],
                                                        ["group_class", "=", "project"],
-                                                       ["name", "=", k]],
-                                              limit=1).execute(num_retries=self.num_retries)["items"]
+                                                       namefilter],
+                                              limit=2).execute(num_retries=self.num_retries)["items"]
             if not contents:
                 contents = self.api.collections().list(filters=[["owner_uuid", "=", self.project_uuid],
-                                                                ["name", "=", k]],
-                                                       limit=1).execute(num_retries=self.num_retries)["items"]
+                                                                namefilter],
+                                                       limit=2).execute(num_retries=self.num_retries)["items"]
         if contents:
-            name = sanitize_filename(self.namefn(contents[0]))
+            if len(contents) > 1 and contents[1]['name'] == k:
+                # If "foo/bar" and "foo[SUBST]bar" both exist, use
+                # "foo[SUBST]bar".
+                contents = [contents[1]]
+            name = self.sanitize_filename(self.namefn(contents[0]))
             if name != k:
                 raise KeyError(k)
             return self._add_entry(contents[0], name)
@@ -914,7 +970,7 @@ class ProjectDirectory(Directory):
         with llfuse.lock_released:
             if not self._current_user:
                 self._current_user = self.api.users().current().execute(num_retries=self.num_retries)
-            return self._current_user["uuid"] in self.project_object["writable_by"]
+            return self._current_user["uuid"] in self.project_object.get("writable_by", [])
 
     def persisted(self):
         return True
@@ -979,8 +1035,8 @@ class ProjectDirectory(Directory):
         new_attrs = properties.get("new_attributes") or {}
         old_attrs["uuid"] = ev["object_uuid"]
         new_attrs["uuid"] = ev["object_uuid"]
-        old_name = sanitize_filename(self.namefn(old_attrs))
-        new_name = sanitize_filename(self.namefn(new_attrs))
+        old_name = self.sanitize_filename(self.namefn(old_attrs))
+        new_name = self.sanitize_filename(self.namefn(new_attrs))
 
         # create events will have a new name, but not an old name
         # delete events will have an old name, but not a new name
@@ -1022,7 +1078,7 @@ class SharedDirectory(Directory):
 
     def __init__(self, parent_inode, inodes, api, num_retries, exclude,
                  poll=False, poll_time=60):
-        super(SharedDirectory, self).__init__(parent_inode, inodes)
+        super(SharedDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self.current_user = api.users().current().execute(num_retries=num_retries)
@@ -1038,35 +1094,55 @@ class SharedDirectory(Directory):
                 if not self.stale():
                     return
 
-                all_projects = arvados.util.list_all(
-                    self.api.groups().list, self.num_retries,
-                    filters=[['group_class','=','project']],
-                    select=["uuid", "owner_uuid"])
-                objects = {}
-                for ob in all_projects:
-                    objects[ob['uuid']] = ob
-
+                contents = {}
                 roots = []
                 root_owners = set()
-                current_uuid = self.current_user['uuid']
-                for ob in all_projects:
-                    if ob['owner_uuid'] != current_uuid and ob['owner_uuid'] not in objects:
-                        roots.append(ob['uuid'])
-                        root_owners.add(ob['owner_uuid'])
-
-                lusers = arvados.util.list_all(
-                    self.api.users().list, self.num_retries,
-                    filters=[['uuid','in', list(root_owners)]])
-                lgroups = arvados.util.list_all(
-                    self.api.groups().list, self.num_retries,
-                    filters=[['uuid','in', list(root_owners)+roots]])
-
-                for l in lusers:
-                    objects[l["uuid"]] = l
-                for l in lgroups:
-                    objects[l["uuid"]] = l
+                objects = {}
+
+                methods = self.api._rootDesc.get('resources')["groups"]['methods']
+                if 'httpMethod' in methods.get('shared', {}):
+                    page = []
+                    while True:
+                        resp = self.api.groups().shared(filters=[['group_class', '=', 'project']]+page,
+                                                        order="uuid",
+                                                        limit=10000,
+                                                        count="none",
+                                                        include="owner_uuid").execute()
+                        if not resp["items"]:
+                            break
+                        page = [["uuid", ">", resp["items"][len(resp["items"])-1]["uuid"]]]
+                        for r in resp["items"]:
+                            objects[r["uuid"]] = r
+                            roots.append(r["uuid"])
+                        for r in resp["included"]:
+                            objects[r["uuid"]] = r
+                            root_owners.add(r["uuid"])
+                else:
+                    all_projects = arvados.util.list_all(
+                        self.api.groups().list, self.num_retries,
+                        filters=[['group_class','=','project']],
+                        select=["uuid", "owner_uuid"])
+                    for ob in all_projects:
+                        objects[ob['uuid']] = ob
+
+                    current_uuid = self.current_user['uuid']
+                    for ob in all_projects:
+                        if ob['owner_uuid'] != current_uuid and ob['owner_uuid'] not in objects:
+                            roots.append(ob['uuid'])
+                            root_owners.add(ob['owner_uuid'])
+
+                    lusers = arvados.util.list_all(
+                        self.api.users().list, self.num_retries,
+                        filters=[['uuid','in', list(root_owners)]])
+                    lgroups = arvados.util.list_all(
+                        self.api.groups().list, self.num_retries,
+                        filters=[['uuid','in', list(root_owners)+roots]])
+
+                    for l in lusers:
+                        objects[l["uuid"]] = l
+                    for l in lgroups:
+                        objects[l["uuid"]] = l
 
-                contents = {}
                 for r in root_owners:
                     if r in objects:
                         obr = objects[r]
@@ -1085,7 +1161,7 @@ class SharedDirectory(Directory):
 
             # end with llfuse.lock_released, re-acquire lock
 
-            self.merge(contents.items(),
+            self.merge(viewitems(contents),
                        lambda i: i[0],
                        lambda a, i: a.uuid() == i[1]['uuid'],
                        lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time))