# appear as underscores in the fuse mount.)
_disallowed_filename_characters = re.compile('[\x00/]')
-# '.' and '..' are not reachable if API server is newer than #6277
-def sanitize_filename(dirty):
- """Replace disallowed filename characters with harmless "_"."""
- if dirty is None:
- return None
- elif dirty == '':
- return '_'
- elif dirty == '.':
- return '_'
- elif dirty == '..':
- return '__'
- else:
- return _disallowed_filename_characters.sub('_', dirty)
-
class Directory(FreshBase):
"""Generic directory object, backed by a dict.
and the value referencing a File or Directory object.
"""
- def __init__(self, parent_inode, inodes):
+ def __init__(self, parent_inode, inodes, apiconfig):
"""parent_inode is the integer inode number"""
super(Directory, self).__init__()
raise Exception("parent_inode should be an int")
self.parent_inode = parent_inode
self.inodes = inodes
+ self.apiconfig = apiconfig
self._entries = {}
self._mtime = time.time()
- # Overriden by subclasses to implement logic to update the entries dict
- # when the directory is stale
+ def forward_slash_subst(self):
+ if not hasattr(self, '_fsns'):
+ self._fsns = None
+ config = self.apiconfig()
+ try:
+ self._fsns = config["Collections"]["ForwardSlashNameSubstitution"]
+ except KeyError:
+ # old API server with no FSNS config
+ self._fsns = '_'
+ else:
+ if self._fsns == '' or self._fsns == '/':
+ self._fsns = None
+ return self._fsns
+
+ def unsanitize_filename(self, incoming):
+ """Replace ForwardSlashNameSubstitution value with /"""
+ fsns = self.forward_slash_subst()
+ if isinstance(fsns, str):
+ return incoming.replace(fsns, '/')
+ else:
+ return incoming
+
+ def sanitize_filename(self, dirty):
+ """Replace disallowed filename characters according to
+ ForwardSlashNameSubstitution in self.api_config."""
+ # '.' and '..' are not reachable if API server is newer than #6277
+ if dirty is None:
+ return None
+ elif dirty == '':
+ return '_'
+ elif dirty == '.':
+ return '_'
+ elif dirty == '..':
+ return '__'
+ else:
+ fsns = self.forward_slash_subst()
+ if isinstance(fsns, str):
+ dirty = dirty.replace('/', fsns)
+ return _disallowed_filename_characters.sub('_', dirty)
+
+
+ # Overridden by subclasses to implement logic to update the
+ # entries dict when the directory is stale
@use_counter
def update(self):
pass
self._entries = {}
changed = False
for i in items:
- name = sanitize_filename(fn(i))
+ name = self.sanitize_filename(fn(i))
if name:
if name in oldentries and same(oldentries[name], i):
# move existing directory entry over
"""
- def __init__(self, parent_inode, inodes, collection):
- super(CollectionDirectoryBase, self).__init__(parent_inode, inodes)
+ def __init__(self, parent_inode, inodes, apiconfig, collection):
+ super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig)
+ self.apiconfig = apiconfig
self.collection = collection
def new_entry(self, name, item, mtime):
- name = sanitize_filename(name)
+ name = self.sanitize_filename(name)
if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
if item.fuse_entry.dead is not True:
raise Exception("Can only reparent dead inode entry")
item.fuse_entry.dead = False
self._entries[name] = item.fuse_entry
elif isinstance(item, arvados.collection.RichCollectionBase):
- self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, item))
+ self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, self.apiconfig, item))
self._entries[name].populate(mtime)
else:
self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime))
def on_event(self, event, collection, name, item):
if collection == self.collection:
- name = sanitize_filename(name)
+ name = self.sanitize_filename(name)
_logger.debug("collection notify %s %s %s %s", event, collection, name, item)
with llfuse.lock:
if event == arvados.collection.ADD:
"""Represents the root of a directory tree representing a collection."""
def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None):
- super(CollectionDirectory, self).__init__(parent_inode, inodes, None)
+ super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config, None)
self.api = api
self.num_retries = num_retries
self.collection_record_file = None
keep_client=api_client.keep,
num_retries=num_retries)
super(TmpCollectionDirectory, self).__init__(
- parent_inode, inodes, collection)
+ parent_inode, inodes, api_client.config, collection)
self.collection_record_file = None
self.populate(self.mtime())
""".lstrip()
def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False):
- super(MagicDirectory, self).__init__(parent_inode, inodes)
+ super(MagicDirectory, self).__init__(parent_inode, inodes, api.config)
self.api = api
self.num_retries = num_retries
self.pdh_only = pdh_only
e = self.inodes.add_entry(ProjectDirectory(
self.inode, self.inodes, self.api, self.num_retries, project[u'items'][0]))
else:
+ import sys
e = self.inodes.add_entry(CollectionDirectory(
self.inode, self.inodes, self.api, self.num_retries, k))
"""A special directory that contains as subdirectories all tags visible to the user."""
def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60):
- super(TagsDirectory, self).__init__(parent_inode, inodes)
+ super(TagsDirectory, self).__init__(parent_inode, inodes, api.config)
self.api = api
self.num_retries = num_retries
self._poll = True
def __init__(self, parent_inode, inodes, api, num_retries, tag,
poll=False, poll_time=60):
- super(TagDirectory, self).__init__(parent_inode, inodes)
+ super(TagDirectory, self).__init__(parent_inode, inodes, api.config)
self.api = api
self.num_retries = num_retries
self.tag = tag
def __init__(self, parent_inode, inodes, api, num_retries, project_object,
poll=False, poll_time=60):
- super(ProjectDirectory, self).__init__(parent_inode, inodes)
+ super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config)
self.api = api
self.num_retries = num_retries
self.project_object = project_object
elif self._full_listing or super(ProjectDirectory, self).__contains__(k):
return super(ProjectDirectory, self).__getitem__(k)
with llfuse.lock_released:
+ k2 = self.unsanitize_filename(k)
+ if k2 == k:
+ namefilter = ["name", "=", k]
+ else:
+ namefilter = ["name", "in", [k, k2]]
contents = self.api.groups().list(filters=[["owner_uuid", "=", self.project_uuid],
["group_class", "=", "project"],
- ["name", "=", k]],
- limit=1).execute(num_retries=self.num_retries)["items"]
+ namefilter],
+ limit=2).execute(num_retries=self.num_retries)["items"]
if not contents:
contents = self.api.collections().list(filters=[["owner_uuid", "=", self.project_uuid],
- ["name", "=", k]],
- limit=1).execute(num_retries=self.num_retries)["items"]
+ namefilter],
+ limit=2).execute(num_retries=self.num_retries)["items"]
if contents:
- name = sanitize_filename(self.namefn(contents[0]))
+ if len(contents) > 1 and contents[1]['name'] == k:
+ # If "foo/bar" and "foo[SUBST]bar" both exist, use
+ # "foo[SUBST]bar".
+ contents = [contents[1]]
+ name = self.sanitize_filename(self.namefn(contents[0]))
if name != k:
raise KeyError(k)
return self._add_entry(contents[0], name)
new_attrs = properties.get("new_attributes") or {}
old_attrs["uuid"] = ev["object_uuid"]
new_attrs["uuid"] = ev["object_uuid"]
- old_name = sanitize_filename(self.namefn(old_attrs))
- new_name = sanitize_filename(self.namefn(new_attrs))
+ old_name = self.sanitize_filename(self.namefn(old_attrs))
+ new_name = self.sanitize_filename(self.namefn(new_attrs))
# create events will have a new name, but not an old name
# delete events will have an old name, but not a new name
def __init__(self, parent_inode, inodes, api, num_retries, exclude,
poll=False, poll_time=60):
- super(SharedDirectory, self).__init__(parent_inode, inodes)
+ super(SharedDirectory, self).__init__(parent_inode, inodes, api.config)
self.api = api
self.num_retries = num_retries
self.current_user = api.users().current().execute(num_retries=num_retries)
import arvados_fuse as fuse
from . import run_test_server
+from .integration_test import IntegrationTest
from .mount_test_base import MountTestBase
logger = logging.getLogger('arvados.arv-mount')
llfuse.listdir(os.path.join(self.mounttmp, self.testcollection))
-class FuseUnitTest(unittest.TestCase):
+class SanitizeFilenameTest(MountTestBase):
def test_sanitize_filename(self):
+ pdir = fuse.ProjectDirectory(1, {}, self.api, 0, project_object=self.api.users().current().execute())
acceptable = [
"foo.txt",
".foo",
"//",
]
for f in acceptable:
- self.assertEqual(f, fuse.sanitize_filename(f))
+ self.assertEqual(f, pdir.sanitize_filename(f))
for f in unacceptable:
- self.assertNotEqual(f, fuse.sanitize_filename(f))
+ self.assertNotEqual(f, pdir.sanitize_filename(f))
# The sanitized filename should be the same length, though.
- self.assertEqual(len(f), len(fuse.sanitize_filename(f)))
+ self.assertEqual(len(f), len(pdir.sanitize_filename(f)))
# Special cases
- self.assertEqual("_", fuse.sanitize_filename(""))
- self.assertEqual("_", fuse.sanitize_filename("."))
- self.assertEqual("__", fuse.sanitize_filename(".."))
+ self.assertEqual("_", pdir.sanitize_filename(""))
+ self.assertEqual("_", pdir.sanitize_filename("."))
+ self.assertEqual("__", pdir.sanitize_filename(".."))
class FuseMagicTestPDHOnly(MountTestBase):
def test_with_default_by_id(self):
self.verify_pdh_only(skip_pdh_only=True)
+
+
+class SlashSubstitutionTest(IntegrationTest):
+ mnt_args = [
+ '--read-write',
+ '--mount-home', 'zzz',
+ ]
+
+ def setUp(self):
+ super(SlashSubstitutionTest, self).setUp()
+ self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
+ self.api.config = lambda: {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
+ self.testcoll = self.api.collections().create(body={"name": "foo/bar/baz"}).execute()
+ self.testcolleasy = self.api.collections().create(body={"name": "foo-bar-baz"}).execute()
+ self.fusename = 'foo[SLASH]bar[SLASH]baz'
+
+ @IntegrationTest.mount(argv=mnt_args)
+ @mock.patch('arvados.util.get_config_once')
+ def test_slash_substitution_before_listing(self, get_config_once):
+ get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
+ self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename)
+ self.checkContents()
+ @staticmethod
+ def _test_slash_substitution_before_listing(self, tmpdir, fusename):
+ with open(os.path.join(tmpdir, 'foo-bar-baz', 'waz'), 'w') as f:
+ f.write('xxx')
+ with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f:
+ f.write('foo')
+
+ @IntegrationTest.mount(argv=mnt_args)
+ @mock.patch('arvados.util.get_config_once')
+ def test_slash_substitution_after_listing(self, get_config_once):
+ get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
+ self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename)
+ self.checkContents()
+ @staticmethod
+ def _test_slash_substitution_after_listing(self, tmpdir, fusename):
+ with open(os.path.join(tmpdir, 'foo-bar-baz', 'waz'), 'w') as f:
+ f.write('xxx')
+ os.listdir(tmpdir)
+ with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f:
+ f.write('foo')
+
+ def checkContents(self):
+ self.assertRegexpMatches(self.api.collections().get(uuid=self.testcoll['uuid']).execute()['manifest_text'], ' acbd18db') # md5(foo)
+ self.assertRegexpMatches(self.api.collections().get(uuid=self.testcolleasy['uuid']).execute()['manifest_text'], ' f561aaf6') # md5(xxx)
+
+ @IntegrationTest.mount(argv=mnt_args)
+ @mock.patch('arvados.util.get_config_once')
+ def test_slash_substitution_conflict(self, get_config_once):
+ self.testcollconflict = self.api.collections().create(body={"name": self.fusename}).execute()
+ get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
+ self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename)
+ self.assertRegexpMatches(self.api.collections().get(uuid=self.testcollconflict['uuid']).execute()['manifest_text'], ' acbd18db') # md5(foo)
+ # foo/bar/baz collection unchanged, because it is masked by foo[SLASH]bar[SLASH]baz
+ self.assertEqual(self.api.collections().get(uuid=self.testcoll['uuid']).execute()['manifest_text'], '')
+ @staticmethod
+ def _test_slash_substitution_conflict(self, tmpdir, fusename):
+ with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f:
+ f.write('foo')