Merge branch '16039-fuse-forward-slash-sub'
authorTom Clegg <tom@tomclegg.ca>
Fri, 14 Feb 2020 20:12:40 +0000 (15:12 -0500)
committerTom Clegg <tom@tomclegg.ca>
Fri, 14 Feb 2020 20:12:40 +0000 (15:12 -0500)
fixes #16039

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

build/run-tests.sh
sdk/cwl/setup.py
sdk/pam/setup.py
sdk/python/arvados/api.py
sdk/python/arvados/util.py
services/fuse/arvados_fuse/__init__.py
services/fuse/arvados_fuse/command.py
services/fuse/arvados_fuse/fusedir.py
services/fuse/tests/test_mount.py
services/nodemanager/setup.py
tools/crunchstat-summary/setup.py

index f21861762db80b40535c6d0522a379b12dadca1b..c4c5335596a569d54f2a25c7414240baa9e2c4af 100755 (executable)
@@ -647,8 +647,8 @@ install_env() {
     . "$VENVDIR/bin/activate"
 
     # Needed for run_test_server.py which is used by certain (non-Python) tests.
-    pip install --no-cache-dir PyYAML future \
-        || fatal "pip install PyYAML failed"
+    pip install --no-cache-dir PyYAML future httplib2 \
+        || fatal "`pip install PyYAML future httplib2` failed"
 
     # Preinstall libcloud if using a fork; otherwise nodemanager "pip
     # install" won't pick it up by default.
@@ -1099,6 +1099,7 @@ install_deps() {
     do_install sdk/cli
     do_install sdk/perl
     do_install sdk/python pip
+    do_install sdk/python pip3
     do_install sdk/ruby
     do_install services/api
     do_install services/arv-git-httpd go
index 62ceab2fa11c1895983cd518fb2d4370a49da986..d4bb6d102a935780e13921fe546119244c0364e1 100644 (file)
@@ -36,7 +36,8 @@ setup(name='arvados-cwl-runner',
           'bin/arvados-cwl-runner',
       ],
       # Note that arvados/build/run-build-packages.sh looks at this
-      # file to determine what version of cwltool and schema-salad to build.
+      # file to determine what version of cwltool and schema-salad to
+      # build.
       install_requires=[
           'cwltool==1.0.20190831161204',
           'schema-salad==4.5.20190815125611',
@@ -63,5 +64,5 @@ setup(name='arvados-cwl-runner',
           'mock>=1.0,<4',
           'subprocess32>=3.5.1',
       ],
-      zip_safe=True
-      )
+      zip_safe=True,
+)
index af00142a04756400491d52db4c9197f7ae094cdd..59b49a19fe4876562175c09adc6939c52bdb86b5 100755 (executable)
@@ -53,5 +53,5 @@ setup(name='arvados-pam',
       ],
       test_suite='tests',
       tests_require=['pbr<1.7.0', 'mock>=1.0', 'python-pam'],
-      zip_safe=False
-      )
+      zip_safe=False,
+)
index b18ce25fd218201ab75f4b3c14f9c7b66f84f373..ae687c50bd98b62746afae0b3d381c59e5e42bd7 100644 (file)
@@ -237,6 +237,7 @@ def api(version=None, cache=True, host=None, token=None, insecure=False,
     svc.api_token = token
     svc.insecure = insecure
     svc.request_id = request_id
+    svc.config = lambda: util.get_config_once(svc)
     kwargs['http'].max_request_size = svc._rootDesc.get('maxRequestSize', 0)
     kwargs['http'].cache = None
     kwargs['http']._request_id = lambda: svc.request_id or util.new_request_id()
index fd29a3dc1d46938d2ea9e5c661bea2cbf20659be..9e0a3178305068c4edec716c57467221f98af562 100644 (file)
@@ -419,3 +419,11 @@ def new_request_id():
             rid += chr(c+ord('a')-10)
         n = n // 36
     return rid
+
+def get_config_once(svc):
+    if not svc._rootDesc.get('resources')['configs']:
+        # Old API server version, no config export endpoint
+        return {}
+    if not hasattr(svc, '_cached_config'):
+        svc._cached_config = svc.configs().get().execute()
+    return svc._cached_config
index 0944a318750d995055e970d445756df94e981382..3a0316cf9e1e48cb319792c1636c56c8fc1eabc2 100644 (file)
@@ -98,7 +98,7 @@ else:
 
 LLFUSE_VERSION_0 = llfuse.__version__.startswith('0')
 
-from .fusedir import sanitize_filename, Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase
+from .fusedir import Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase
 from .fusefile import StringFile, FuseArvadosFile
 
 _logger = logging.getLogger('arvados.arvados_fuse')
index 5283367532b6e78956bf0721b4267c8eaea4afe3..7bef8a269fd5a2aec7dcd93f272e5a0a5bd99d19 100644 (file)
@@ -301,7 +301,7 @@ class Mount(object):
             return
 
         e = self.operations.inodes.add_entry(Directory(
-            llfuse.ROOT_INODE, self.operations.inodes))
+            llfuse.ROOT_INODE, self.operations.inodes, self.api.config))
         dir_args[0] = e.inode
 
         for name in self.args.mount_by_id:
index 328765744128d180ba30854854f71b2340ee1cc0..8b12f73e895a8d59e3f95461218ab7cf14589887 100644 (file)
@@ -33,20 +33,6 @@ _logger = logging.getLogger('arvados.arvados_fuse')
 # appear as underscores in the fuse mount.)
 _disallowed_filename_characters = re.compile('[\x00/]')
 
-# '.' and '..' are not reachable if API server is newer than #6277
-def sanitize_filename(dirty):
-    """Replace disallowed filename characters with harmless "_"."""
-    if dirty is None:
-        return None
-    elif dirty == '':
-        return '_'
-    elif dirty == '.':
-        return '_'
-    elif dirty == '..':
-        return '__'
-    else:
-        return _disallowed_filename_characters.sub('_', dirty)
-
 
 class Directory(FreshBase):
     """Generic directory object, backed by a dict.
@@ -55,7 +41,7 @@ class Directory(FreshBase):
     and the value referencing a File or Directory object.
     """
 
-    def __init__(self, parent_inode, inodes):
+    def __init__(self, parent_inode, inodes, apiconfig):
         """parent_inode is the integer inode number"""
 
         super(Directory, self).__init__()
@@ -65,11 +51,53 @@ class Directory(FreshBase):
             raise Exception("parent_inode should be an int")
         self.parent_inode = parent_inode
         self.inodes = inodes
+        self.apiconfig = apiconfig
         self._entries = {}
         self._mtime = time.time()
 
-    #  Overriden by subclasses to implement logic to update the entries dict
-    #  when the directory is stale
+    def forward_slash_subst(self):
+        if not hasattr(self, '_fsns'):
+            self._fsns = None
+            config = self.apiconfig()
+            try:
+                self._fsns = config["Collections"]["ForwardSlashNameSubstitution"]
+            except KeyError:
+                # old API server with no FSNS config
+                self._fsns = '_'
+            else:
+                if self._fsns == '' or self._fsns == '/':
+                    self._fsns = None
+        return self._fsns
+
+    def unsanitize_filename(self, incoming):
+        """Replace ForwardSlashNameSubstitution value with /"""
+        fsns = self.forward_slash_subst()
+        if isinstance(fsns, str):
+            return incoming.replace(fsns, '/')
+        else:
+            return incoming
+
+    def sanitize_filename(self, dirty):
+        """Replace disallowed filename characters according to
+        ForwardSlashNameSubstitution in self.api_config."""
+        # '.' and '..' are not reachable if API server is newer than #6277
+        if dirty is None:
+            return None
+        elif dirty == '':
+            return '_'
+        elif dirty == '.':
+            return '_'
+        elif dirty == '..':
+            return '__'
+        else:
+            fsns = self.forward_slash_subst()
+            if isinstance(fsns, str):
+                dirty = dirty.replace('/', fsns)
+            return _disallowed_filename_characters.sub('_', dirty)
+
+
+    #  Overridden by subclasses to implement logic to update the
+    #  entries dict when the directory is stale
     @use_counter
     def update(self):
         pass
@@ -138,7 +166,7 @@ class Directory(FreshBase):
         self._entries = {}
         changed = False
         for i in items:
-            name = sanitize_filename(fn(i))
+            name = self.sanitize_filename(fn(i))
             if name:
                 if name in oldentries and same(oldentries[name], i):
                     # move existing directory entry over
@@ -246,12 +274,13 @@ class CollectionDirectoryBase(Directory):
 
     """
 
-    def __init__(self, parent_inode, inodes, collection):
-        super(CollectionDirectoryBase, self).__init__(parent_inode, inodes)
+    def __init__(self, parent_inode, inodes, apiconfig, collection):
+        super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig)
+        self.apiconfig = apiconfig
         self.collection = collection
 
     def new_entry(self, name, item, mtime):
-        name = sanitize_filename(name)
+        name = self.sanitize_filename(name)
         if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
             if item.fuse_entry.dead is not True:
                 raise Exception("Can only reparent dead inode entry")
@@ -260,7 +289,7 @@ class CollectionDirectoryBase(Directory):
             item.fuse_entry.dead = False
             self._entries[name] = item.fuse_entry
         elif isinstance(item, arvados.collection.RichCollectionBase):
-            self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, item))
+            self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, self.apiconfig, item))
             self._entries[name].populate(mtime)
         else:
             self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime))
@@ -268,7 +297,7 @@ class CollectionDirectoryBase(Directory):
 
     def on_event(self, event, collection, name, item):
         if collection == self.collection:
-            name = sanitize_filename(name)
+            name = self.sanitize_filename(name)
             _logger.debug("collection notify %s %s %s %s", event, collection, name, item)
             with llfuse.lock:
                 if event == arvados.collection.ADD:
@@ -357,7 +386,7 @@ class CollectionDirectory(CollectionDirectoryBase):
     """Represents the root of a directory tree representing a collection."""
 
     def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None):
-        super(CollectionDirectory, self).__init__(parent_inode, inodes, None)
+        super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config, None)
         self.api = api
         self.num_retries = num_retries
         self.collection_record_file = None
@@ -548,7 +577,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
             keep_client=api_client.keep,
             num_retries=num_retries)
         super(TmpCollectionDirectory, self).__init__(
-            parent_inode, inodes, collection)
+            parent_inode, inodes, api_client.config, collection)
         self.collection_record_file = None
         self.populate(self.mtime())
 
@@ -625,7 +654,7 @@ and the directory will appear if it exists.
 """.lstrip()
 
     def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False):
-        super(MagicDirectory, self).__init__(parent_inode, inodes)
+        super(MagicDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self.pdh_only = pdh_only
@@ -660,6 +689,7 @@ and the directory will appear if it exists.
                 e = self.inodes.add_entry(ProjectDirectory(
                     self.inode, self.inodes, self.api, self.num_retries, project[u'items'][0]))
             else:
+                import sys
                 e = self.inodes.add_entry(CollectionDirectory(
                         self.inode, self.inodes, self.api, self.num_retries, k))
 
@@ -696,7 +726,7 @@ class TagsDirectory(Directory):
     """A special directory that contains as subdirectories all tags visible to the user."""
 
     def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60):
-        super(TagsDirectory, self).__init__(parent_inode, inodes)
+        super(TagsDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self._poll = True
@@ -753,7 +783,7 @@ class TagDirectory(Directory):
 
     def __init__(self, parent_inode, inodes, api, num_retries, tag,
                  poll=False, poll_time=60):
-        super(TagDirectory, self).__init__(parent_inode, inodes)
+        super(TagDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self.tag = tag
@@ -783,7 +813,7 @@ class ProjectDirectory(Directory):
 
     def __init__(self, parent_inode, inodes, api, num_retries, project_object,
                  poll=False, poll_time=60):
-        super(ProjectDirectory, self).__init__(parent_inode, inodes)
+        super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self.project_object = project_object
@@ -897,16 +927,25 @@ class ProjectDirectory(Directory):
         elif self._full_listing or super(ProjectDirectory, self).__contains__(k):
             return super(ProjectDirectory, self).__getitem__(k)
         with llfuse.lock_released:
+            k2 = self.unsanitize_filename(k)
+            if k2 == k:
+                namefilter = ["name", "=", k]
+            else:
+                namefilter = ["name", "in", [k, k2]]
             contents = self.api.groups().list(filters=[["owner_uuid", "=", self.project_uuid],
                                                        ["group_class", "=", "project"],
-                                                       ["name", "=", k]],
-                                              limit=1).execute(num_retries=self.num_retries)["items"]
+                                                       namefilter],
+                                              limit=2).execute(num_retries=self.num_retries)["items"]
             if not contents:
                 contents = self.api.collections().list(filters=[["owner_uuid", "=", self.project_uuid],
-                                                                ["name", "=", k]],
-                                                       limit=1).execute(num_retries=self.num_retries)["items"]
+                                                                namefilter],
+                                                       limit=2).execute(num_retries=self.num_retries)["items"]
         if contents:
-            name = sanitize_filename(self.namefn(contents[0]))
+            if len(contents) > 1 and contents[1]['name'] == k:
+                # If "foo/bar" and "foo[SUBST]bar" both exist, use
+                # "foo[SUBST]bar".
+                contents = [contents[1]]
+            name = self.sanitize_filename(self.namefn(contents[0]))
             if name != k:
                 raise KeyError(k)
             return self._add_entry(contents[0], name)
@@ -995,8 +1034,8 @@ class ProjectDirectory(Directory):
         new_attrs = properties.get("new_attributes") or {}
         old_attrs["uuid"] = ev["object_uuid"]
         new_attrs["uuid"] = ev["object_uuid"]
-        old_name = sanitize_filename(self.namefn(old_attrs))
-        new_name = sanitize_filename(self.namefn(new_attrs))
+        old_name = self.sanitize_filename(self.namefn(old_attrs))
+        new_name = self.sanitize_filename(self.namefn(new_attrs))
 
         # create events will have a new name, but not an old name
         # delete events will have an old name, but not a new name
@@ -1038,7 +1077,7 @@ class SharedDirectory(Directory):
 
     def __init__(self, parent_inode, inodes, api, num_retries, exclude,
                  poll=False, poll_time=60):
-        super(SharedDirectory, self).__init__(parent_inode, inodes)
+        super(SharedDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self.current_user = api.users().current().execute(num_retries=num_retries)
index f539b3f7d055d9e8753fe85e917777adca00dc4c..593d945cff0be54e46cb360712efd20b28e1658d 100644 (file)
@@ -20,6 +20,7 @@ import arvados
 import arvados_fuse as fuse
 from . import run_test_server
 
+from .integration_test import IntegrationTest
 from .mount_test_base import MountTestBase
 
 logger = logging.getLogger('arvados.arv-mount')
@@ -1098,8 +1099,9 @@ class MagicDirApiError(FuseMagicTest):
             llfuse.listdir(os.path.join(self.mounttmp, self.testcollection))
 
 
-class FuseUnitTest(unittest.TestCase):
+class SanitizeFilenameTest(MountTestBase):
     def test_sanitize_filename(self):
+        pdir = fuse.ProjectDirectory(1, {}, self.api, 0, project_object=self.api.users().current().execute())
         acceptable = [
             "foo.txt",
             ".foo",
@@ -1119,15 +1121,15 @@ class FuseUnitTest(unittest.TestCase):
             "//",
             ]
         for f in acceptable:
-            self.assertEqual(f, fuse.sanitize_filename(f))
+            self.assertEqual(f, pdir.sanitize_filename(f))
         for f in unacceptable:
-            self.assertNotEqual(f, fuse.sanitize_filename(f))
+            self.assertNotEqual(f, pdir.sanitize_filename(f))
             # The sanitized filename should be the same length, though.
-            self.assertEqual(len(f), len(fuse.sanitize_filename(f)))
+            self.assertEqual(len(f), len(pdir.sanitize_filename(f)))
         # Special cases
-        self.assertEqual("_", fuse.sanitize_filename(""))
-        self.assertEqual("_", fuse.sanitize_filename("."))
-        self.assertEqual("__", fuse.sanitize_filename(".."))
+        self.assertEqual("_", pdir.sanitize_filename(""))
+        self.assertEqual("_", pdir.sanitize_filename("."))
+        self.assertEqual("__", pdir.sanitize_filename(".."))
 
 
 class FuseMagicTestPDHOnly(MountTestBase):
@@ -1191,3 +1193,63 @@ class FuseMagicTestPDHOnly(MountTestBase):
 
     def test_with_default_by_id(self):
         self.verify_pdh_only(skip_pdh_only=True)
+
+
+class SlashSubstitutionTest(IntegrationTest):
+    mnt_args = [
+        '--read-write',
+        '--mount-home', 'zzz',
+    ]
+
+    def setUp(self):
+        super(SlashSubstitutionTest, self).setUp()
+        self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
+        self.api.config = lambda: {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
+        self.testcoll = self.api.collections().create(body={"name": "foo/bar/baz"}).execute()
+        self.testcolleasy = self.api.collections().create(body={"name": "foo-bar-baz"}).execute()
+        self.fusename = 'foo[SLASH]bar[SLASH]baz'
+
+    @IntegrationTest.mount(argv=mnt_args)
+    @mock.patch('arvados.util.get_config_once')
+    def test_slash_substitution_before_listing(self, get_config_once):
+        get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
+        self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename)
+        self.checkContents()
+    @staticmethod
+    def _test_slash_substitution_before_listing(self, tmpdir, fusename):
+        with open(os.path.join(tmpdir, 'foo-bar-baz', 'waz'), 'w') as f:
+            f.write('xxx')
+        with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f:
+            f.write('foo')
+
+    @IntegrationTest.mount(argv=mnt_args)
+    @mock.patch('arvados.util.get_config_once')
+    def test_slash_substitution_after_listing(self, get_config_once):
+        get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
+        self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename)
+        self.checkContents()
+    @staticmethod
+    def _test_slash_substitution_after_listing(self, tmpdir, fusename):
+        with open(os.path.join(tmpdir, 'foo-bar-baz', 'waz'), 'w') as f:
+            f.write('xxx')
+        os.listdir(tmpdir)
+        with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f:
+            f.write('foo')
+
+    def checkContents(self):
+        self.assertRegexpMatches(self.api.collections().get(uuid=self.testcoll['uuid']).execute()['manifest_text'], ' acbd18db') # md5(foo)
+        self.assertRegexpMatches(self.api.collections().get(uuid=self.testcolleasy['uuid']).execute()['manifest_text'], ' f561aaf6') # md5(xxx)
+
+    @IntegrationTest.mount(argv=mnt_args)
+    @mock.patch('arvados.util.get_config_once')
+    def test_slash_substitution_conflict(self, get_config_once):
+        self.testcollconflict = self.api.collections().create(body={"name": self.fusename}).execute()
+        get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
+        self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename)
+        self.assertRegexpMatches(self.api.collections().get(uuid=self.testcollconflict['uuid']).execute()['manifest_text'], ' acbd18db') # md5(foo)
+        # foo/bar/baz collection unchanged, because it is masked by foo[SLASH]bar[SLASH]baz
+        self.assertEqual(self.api.collections().get(uuid=self.testcoll['uuid']).execute()['manifest_text'], '')
+    @staticmethod
+    def _test_slash_substitution_conflict(self, tmpdir, fusename):
+        with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f:
+            f.write('foo')
index a2b9a0ca92458a5c3b7b2b4af414b5451b922b75..75e8f85fbd1b3bfb4fba3de66ec9d341e06fb5ed 100644 (file)
@@ -56,5 +56,5 @@ setup(name='arvados-node-manager',
           'apache-libcloud==2.5.0',
           'subprocess32>=3.5.1',
       ],
-      zip_safe=False
-      )
+      zip_safe=False,
+)
index 40c5a2f9a31be5922bba781b4b530d3b7363125f..557b6d3f4e688c263706b3b5036c52ccf98d6821 100755 (executable)
@@ -42,5 +42,5 @@ setup(name='crunchstat_summary',
       ],
       test_suite='tests',
       tests_require=['pbr<1.7.0', 'mock>=1.0'],
-      zip_safe=False
-      )
+      zip_safe=False,
+)