From: Tom Clegg Date: Fri, 14 Feb 2020 20:12:40 +0000 (-0500) Subject: Merge branch '16039-fuse-forward-slash-sub' X-Git-Tag: 2.1.0~298 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/48c38895200cdafaaeca37299bf8352878389a77?hp=166f27869b2f530aa3dd05b02cdc46f787dd8216 Merge branch '16039-fuse-forward-slash-sub' fixes #16039 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/build/run-tests.sh b/build/run-tests.sh index f21861762d..c4c5335596 100755 --- a/build/run-tests.sh +++ b/build/run-tests.sh @@ -647,8 +647,8 @@ install_env() { . "$VENVDIR/bin/activate" # Needed for run_test_server.py which is used by certain (non-Python) tests. - pip install --no-cache-dir PyYAML future \ - || fatal "pip install PyYAML failed" + pip install --no-cache-dir PyYAML future httplib2 \ + || fatal "`pip install PyYAML future httplib2` failed" # Preinstall libcloud if using a fork; otherwise nodemanager "pip # install" won't pick it up by default. @@ -1099,6 +1099,7 @@ install_deps() { do_install sdk/cli do_install sdk/perl do_install sdk/python pip + do_install sdk/python pip3 do_install sdk/ruby do_install services/api do_install services/arv-git-httpd go diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py index 62ceab2fa1..d4bb6d102a 100644 --- a/sdk/cwl/setup.py +++ b/sdk/cwl/setup.py @@ -36,7 +36,8 @@ setup(name='arvados-cwl-runner', 'bin/arvados-cwl-runner', ], # Note that arvados/build/run-build-packages.sh looks at this - # file to determine what version of cwltool and schema-salad to build. + # file to determine what version of cwltool and schema-salad to + # build. install_requires=[ 'cwltool==1.0.20190831161204', 'schema-salad==4.5.20190815125611', @@ -63,5 +64,5 @@ setup(name='arvados-cwl-runner', 'mock>=1.0,<4', 'subprocess32>=3.5.1', ], - zip_safe=True - ) + zip_safe=True, +) diff --git a/sdk/pam/setup.py b/sdk/pam/setup.py index af00142a04..59b49a19fe 100755 --- a/sdk/pam/setup.py +++ b/sdk/pam/setup.py @@ -53,5 +53,5 @@ setup(name='arvados-pam', ], test_suite='tests', tests_require=['pbr<1.7.0', 'mock>=1.0', 'python-pam'], - zip_safe=False - ) + zip_safe=False, +) diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py index b18ce25fd2..ae687c50bd 100644 --- a/sdk/python/arvados/api.py +++ b/sdk/python/arvados/api.py @@ -237,6 +237,7 @@ def api(version=None, cache=True, host=None, token=None, insecure=False, svc.api_token = token svc.insecure = insecure svc.request_id = request_id + svc.config = lambda: util.get_config_once(svc) kwargs['http'].max_request_size = svc._rootDesc.get('maxRequestSize', 0) kwargs['http'].cache = None kwargs['http']._request_id = lambda: svc.request_id or util.new_request_id() diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py index fd29a3dc1d..9e0a317830 100644 --- a/sdk/python/arvados/util.py +++ b/sdk/python/arvados/util.py @@ -419,3 +419,11 @@ def new_request_id(): rid += chr(c+ord('a')-10) n = n // 36 return rid + +def get_config_once(svc): + if not svc._rootDesc.get('resources')['configs']: + # Old API server version, no config export endpoint + return {} + if not hasattr(svc, '_cached_config'): + svc._cached_config = svc.configs().get().execute() + return svc._cached_config diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py index 0944a31875..3a0316cf9e 100644 --- a/services/fuse/arvados_fuse/__init__.py +++ b/services/fuse/arvados_fuse/__init__.py @@ -98,7 +98,7 @@ else: LLFUSE_VERSION_0 = llfuse.__version__.startswith('0') -from .fusedir import sanitize_filename, Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase +from .fusedir import Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase from .fusefile import StringFile, FuseArvadosFile _logger = logging.getLogger('arvados.arvados_fuse') diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py index 5283367532..7bef8a269f 100644 --- a/services/fuse/arvados_fuse/command.py +++ b/services/fuse/arvados_fuse/command.py @@ -301,7 +301,7 @@ class Mount(object): return e = self.operations.inodes.add_entry(Directory( - llfuse.ROOT_INODE, self.operations.inodes)) + llfuse.ROOT_INODE, self.operations.inodes, self.api.config)) dir_args[0] = e.inode for name in self.args.mount_by_id: diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py index 3287657441..8b12f73e89 100644 --- a/services/fuse/arvados_fuse/fusedir.py +++ b/services/fuse/arvados_fuse/fusedir.py @@ -33,20 +33,6 @@ _logger = logging.getLogger('arvados.arvados_fuse') # appear as underscores in the fuse mount.) _disallowed_filename_characters = re.compile('[\x00/]') -# '.' and '..' are not reachable if API server is newer than #6277 -def sanitize_filename(dirty): - """Replace disallowed filename characters with harmless "_".""" - if dirty is None: - return None - elif dirty == '': - return '_' - elif dirty == '.': - return '_' - elif dirty == '..': - return '__' - else: - return _disallowed_filename_characters.sub('_', dirty) - class Directory(FreshBase): """Generic directory object, backed by a dict. @@ -55,7 +41,7 @@ class Directory(FreshBase): and the value referencing a File or Directory object. """ - def __init__(self, parent_inode, inodes): + def __init__(self, parent_inode, inodes, apiconfig): """parent_inode is the integer inode number""" super(Directory, self).__init__() @@ -65,11 +51,53 @@ class Directory(FreshBase): raise Exception("parent_inode should be an int") self.parent_inode = parent_inode self.inodes = inodes + self.apiconfig = apiconfig self._entries = {} self._mtime = time.time() - # Overriden by subclasses to implement logic to update the entries dict - # when the directory is stale + def forward_slash_subst(self): + if not hasattr(self, '_fsns'): + self._fsns = None + config = self.apiconfig() + try: + self._fsns = config["Collections"]["ForwardSlashNameSubstitution"] + except KeyError: + # old API server with no FSNS config + self._fsns = '_' + else: + if self._fsns == '' or self._fsns == '/': + self._fsns = None + return self._fsns + + def unsanitize_filename(self, incoming): + """Replace ForwardSlashNameSubstitution value with /""" + fsns = self.forward_slash_subst() + if isinstance(fsns, str): + return incoming.replace(fsns, '/') + else: + return incoming + + def sanitize_filename(self, dirty): + """Replace disallowed filename characters according to + ForwardSlashNameSubstitution in self.api_config.""" + # '.' and '..' are not reachable if API server is newer than #6277 + if dirty is None: + return None + elif dirty == '': + return '_' + elif dirty == '.': + return '_' + elif dirty == '..': + return '__' + else: + fsns = self.forward_slash_subst() + if isinstance(fsns, str): + dirty = dirty.replace('/', fsns) + return _disallowed_filename_characters.sub('_', dirty) + + + # Overridden by subclasses to implement logic to update the + # entries dict when the directory is stale @use_counter def update(self): pass @@ -138,7 +166,7 @@ class Directory(FreshBase): self._entries = {} changed = False for i in items: - name = sanitize_filename(fn(i)) + name = self.sanitize_filename(fn(i)) if name: if name in oldentries and same(oldentries[name], i): # move existing directory entry over @@ -246,12 +274,13 @@ class CollectionDirectoryBase(Directory): """ - def __init__(self, parent_inode, inodes, collection): - super(CollectionDirectoryBase, self).__init__(parent_inode, inodes) + def __init__(self, parent_inode, inodes, apiconfig, collection): + super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig) + self.apiconfig = apiconfig self.collection = collection def new_entry(self, name, item, mtime): - name = sanitize_filename(name) + name = self.sanitize_filename(name) if hasattr(item, "fuse_entry") and item.fuse_entry is not None: if item.fuse_entry.dead is not True: raise Exception("Can only reparent dead inode entry") @@ -260,7 +289,7 @@ class CollectionDirectoryBase(Directory): item.fuse_entry.dead = False self._entries[name] = item.fuse_entry elif isinstance(item, arvados.collection.RichCollectionBase): - self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, item)) + self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, self.apiconfig, item)) self._entries[name].populate(mtime) else: self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime)) @@ -268,7 +297,7 @@ class CollectionDirectoryBase(Directory): def on_event(self, event, collection, name, item): if collection == self.collection: - name = sanitize_filename(name) + name = self.sanitize_filename(name) _logger.debug("collection notify %s %s %s %s", event, collection, name, item) with llfuse.lock: if event == arvados.collection.ADD: @@ -357,7 +386,7 @@ class CollectionDirectory(CollectionDirectoryBase): """Represents the root of a directory tree representing a collection.""" def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None): - super(CollectionDirectory, self).__init__(parent_inode, inodes, None) + super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config, None) self.api = api self.num_retries = num_retries self.collection_record_file = None @@ -548,7 +577,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase): keep_client=api_client.keep, num_retries=num_retries) super(TmpCollectionDirectory, self).__init__( - parent_inode, inodes, collection) + parent_inode, inodes, api_client.config, collection) self.collection_record_file = None self.populate(self.mtime()) @@ -625,7 +654,7 @@ and the directory will appear if it exists. """.lstrip() def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False): - super(MagicDirectory, self).__init__(parent_inode, inodes) + super(MagicDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.pdh_only = pdh_only @@ -660,6 +689,7 @@ and the directory will appear if it exists. e = self.inodes.add_entry(ProjectDirectory( self.inode, self.inodes, self.api, self.num_retries, project[u'items'][0])) else: + import sys e = self.inodes.add_entry(CollectionDirectory( self.inode, self.inodes, self.api, self.num_retries, k)) @@ -696,7 +726,7 @@ class TagsDirectory(Directory): """A special directory that contains as subdirectories all tags visible to the user.""" def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60): - super(TagsDirectory, self).__init__(parent_inode, inodes) + super(TagsDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self._poll = True @@ -753,7 +783,7 @@ class TagDirectory(Directory): def __init__(self, parent_inode, inodes, api, num_retries, tag, poll=False, poll_time=60): - super(TagDirectory, self).__init__(parent_inode, inodes) + super(TagDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.tag = tag @@ -783,7 +813,7 @@ class ProjectDirectory(Directory): def __init__(self, parent_inode, inodes, api, num_retries, project_object, poll=False, poll_time=60): - super(ProjectDirectory, self).__init__(parent_inode, inodes) + super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.project_object = project_object @@ -897,16 +927,25 @@ class ProjectDirectory(Directory): elif self._full_listing or super(ProjectDirectory, self).__contains__(k): return super(ProjectDirectory, self).__getitem__(k) with llfuse.lock_released: + k2 = self.unsanitize_filename(k) + if k2 == k: + namefilter = ["name", "=", k] + else: + namefilter = ["name", "in", [k, k2]] contents = self.api.groups().list(filters=[["owner_uuid", "=", self.project_uuid], ["group_class", "=", "project"], - ["name", "=", k]], - limit=1).execute(num_retries=self.num_retries)["items"] + namefilter], + limit=2).execute(num_retries=self.num_retries)["items"] if not contents: contents = self.api.collections().list(filters=[["owner_uuid", "=", self.project_uuid], - ["name", "=", k]], - limit=1).execute(num_retries=self.num_retries)["items"] + namefilter], + limit=2).execute(num_retries=self.num_retries)["items"] if contents: - name = sanitize_filename(self.namefn(contents[0])) + if len(contents) > 1 and contents[1]['name'] == k: + # If "foo/bar" and "foo[SUBST]bar" both exist, use + # "foo[SUBST]bar". + contents = [contents[1]] + name = self.sanitize_filename(self.namefn(contents[0])) if name != k: raise KeyError(k) return self._add_entry(contents[0], name) @@ -995,8 +1034,8 @@ class ProjectDirectory(Directory): new_attrs = properties.get("new_attributes") or {} old_attrs["uuid"] = ev["object_uuid"] new_attrs["uuid"] = ev["object_uuid"] - old_name = sanitize_filename(self.namefn(old_attrs)) - new_name = sanitize_filename(self.namefn(new_attrs)) + old_name = self.sanitize_filename(self.namefn(old_attrs)) + new_name = self.sanitize_filename(self.namefn(new_attrs)) # create events will have a new name, but not an old name # delete events will have an old name, but not a new name @@ -1038,7 +1077,7 @@ class SharedDirectory(Directory): def __init__(self, parent_inode, inodes, api, num_retries, exclude, poll=False, poll_time=60): - super(SharedDirectory, self).__init__(parent_inode, inodes) + super(SharedDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.current_user = api.users().current().execute(num_retries=num_retries) diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py index f539b3f7d0..593d945cff 100644 --- a/services/fuse/tests/test_mount.py +++ b/services/fuse/tests/test_mount.py @@ -20,6 +20,7 @@ import arvados import arvados_fuse as fuse from . import run_test_server +from .integration_test import IntegrationTest from .mount_test_base import MountTestBase logger = logging.getLogger('arvados.arv-mount') @@ -1098,8 +1099,9 @@ class MagicDirApiError(FuseMagicTest): llfuse.listdir(os.path.join(self.mounttmp, self.testcollection)) -class FuseUnitTest(unittest.TestCase): +class SanitizeFilenameTest(MountTestBase): def test_sanitize_filename(self): + pdir = fuse.ProjectDirectory(1, {}, self.api, 0, project_object=self.api.users().current().execute()) acceptable = [ "foo.txt", ".foo", @@ -1119,15 +1121,15 @@ class FuseUnitTest(unittest.TestCase): "//", ] for f in acceptable: - self.assertEqual(f, fuse.sanitize_filename(f)) + self.assertEqual(f, pdir.sanitize_filename(f)) for f in unacceptable: - self.assertNotEqual(f, fuse.sanitize_filename(f)) + self.assertNotEqual(f, pdir.sanitize_filename(f)) # The sanitized filename should be the same length, though. - self.assertEqual(len(f), len(fuse.sanitize_filename(f))) + self.assertEqual(len(f), len(pdir.sanitize_filename(f))) # Special cases - self.assertEqual("_", fuse.sanitize_filename("")) - self.assertEqual("_", fuse.sanitize_filename(".")) - self.assertEqual("__", fuse.sanitize_filename("..")) + self.assertEqual("_", pdir.sanitize_filename("")) + self.assertEqual("_", pdir.sanitize_filename(".")) + self.assertEqual("__", pdir.sanitize_filename("..")) class FuseMagicTestPDHOnly(MountTestBase): @@ -1191,3 +1193,63 @@ class FuseMagicTestPDHOnly(MountTestBase): def test_with_default_by_id(self): self.verify_pdh_only(skip_pdh_only=True) + + +class SlashSubstitutionTest(IntegrationTest): + mnt_args = [ + '--read-write', + '--mount-home', 'zzz', + ] + + def setUp(self): + super(SlashSubstitutionTest, self).setUp() + self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings()) + self.api.config = lambda: {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.testcoll = self.api.collections().create(body={"name": "foo/bar/baz"}).execute() + self.testcolleasy = self.api.collections().create(body={"name": "foo-bar-baz"}).execute() + self.fusename = 'foo[SLASH]bar[SLASH]baz' + + @IntegrationTest.mount(argv=mnt_args) + @mock.patch('arvados.util.get_config_once') + def test_slash_substitution_before_listing(self, get_config_once): + get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename) + self.checkContents() + @staticmethod + def _test_slash_substitution_before_listing(self, tmpdir, fusename): + with open(os.path.join(tmpdir, 'foo-bar-baz', 'waz'), 'w') as f: + f.write('xxx') + with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f: + f.write('foo') + + @IntegrationTest.mount(argv=mnt_args) + @mock.patch('arvados.util.get_config_once') + def test_slash_substitution_after_listing(self, get_config_once): + get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename) + self.checkContents() + @staticmethod + def _test_slash_substitution_after_listing(self, tmpdir, fusename): + with open(os.path.join(tmpdir, 'foo-bar-baz', 'waz'), 'w') as f: + f.write('xxx') + os.listdir(tmpdir) + with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f: + f.write('foo') + + def checkContents(self): + self.assertRegexpMatches(self.api.collections().get(uuid=self.testcoll['uuid']).execute()['manifest_text'], ' acbd18db') # md5(foo) + self.assertRegexpMatches(self.api.collections().get(uuid=self.testcolleasy['uuid']).execute()['manifest_text'], ' f561aaf6') # md5(xxx) + + @IntegrationTest.mount(argv=mnt_args) + @mock.patch('arvados.util.get_config_once') + def test_slash_substitution_conflict(self, get_config_once): + self.testcollconflict = self.api.collections().create(body={"name": self.fusename}).execute() + get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename) + self.assertRegexpMatches(self.api.collections().get(uuid=self.testcollconflict['uuid']).execute()['manifest_text'], ' acbd18db') # md5(foo) + # foo/bar/baz collection unchanged, because it is masked by foo[SLASH]bar[SLASH]baz + self.assertEqual(self.api.collections().get(uuid=self.testcoll['uuid']).execute()['manifest_text'], '') + @staticmethod + def _test_slash_substitution_conflict(self, tmpdir, fusename): + with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f: + f.write('foo') diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py index a2b9a0ca92..75e8f85fbd 100644 --- a/services/nodemanager/setup.py +++ b/services/nodemanager/setup.py @@ -56,5 +56,5 @@ setup(name='arvados-node-manager', 'apache-libcloud==2.5.0', 'subprocess32>=3.5.1', ], - zip_safe=False - ) + zip_safe=False, +) diff --git a/tools/crunchstat-summary/setup.py b/tools/crunchstat-summary/setup.py index 40c5a2f9a3..557b6d3f4e 100755 --- a/tools/crunchstat-summary/setup.py +++ b/tools/crunchstat-summary/setup.py @@ -42,5 +42,5 @@ setup(name='crunchstat_summary', ], test_suite='tests', tests_require=['pbr<1.7.0', 'mock>=1.0'], - zip_safe=False - ) + zip_safe=False, +)