From 53192c327dcce6159d21f6cc27f7d5c0bfc9e7b0 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Wed, 12 Nov 2014 16:55:06 -0500 Subject: [PATCH] 4363: Fix filename munging. Add tests. --- services/fuse/arvados_fuse/__init__.py | 28 ++++++++++------------ services/fuse/tests/test_mount.py | 33 ++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py index d0f2643ff1..9154c827ca 100644 --- a/services/fuse/arvados_fuse/__init__.py +++ b/services/fuse/arvados_fuse/__init__.py @@ -24,6 +24,11 @@ from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uu _logger = logging.getLogger('arvados.arvados_fuse') +# Match any character which FUSE or Linux cannot accommodate as part +# of a filename. (If present in a collection filename, they will +# appear as underscores in the fuse mount.) +_disallowed_filename_characters = re.compile('[\x00/]') + class SafeApi(object): '''Threadsafe wrapper for API object. This stores and returns a different api object per thread, because httplib2 which underlies apiclient is not @@ -64,24 +69,17 @@ def convertTime(t): return 0 def sanitize_filename(dirty): - '''Remove troublesome characters from filenames.''' - # http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html + '''Replace disallowed filename characters with harmless "_".''' if dirty is None: return None - - fn = "" - for c in dirty: - if (c >= '\x00' and c <= '\x1f') or c == '\x7f' or c == '/': - # skip control characters and / - continue - fn += c - - # strip leading - or ~ and leading/trailing whitespace - stripped = fn.lstrip("-~ ").rstrip() - if len(stripped) > 0: - return stripped + elif dirty == '': + return '_' + elif dirty == '.': + return '_' + elif dirty == '..': + return '__' else: - return None + return _disallowed_filename_characters.sub('_', dirty) class FreshBase(object): diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py index bb14d43c0c..f9d06de3ac 100644 --- a/services/fuse/tests/test_mount.py +++ b/services/fuse/tests/test_mount.py @@ -311,3 +311,36 @@ class FuseHomeTest(MountTestBase): d3 = os.listdir(os.path.join(self.mounttmp, 'Unrestricted public data', 'GNU General Public License, version 3')) d3.sort() self.assertEqual(["GNU_General_Public_License,_version_3.pdf"], d3) + + +class FuseUnitTest(unittest.TestCase): + def test_sanitize_filename(self): + acceptable = [ + "foo.txt", + ".foo", + "..foo", + "...", + "foo...", + "foo..", + "foo.", + "-", + "\x01\x02\x03", + ] + unacceptable = [ + "f\00", + "\00\00", + "/foo", + "foo/", + "//", + ] + for f in acceptable: + self.assertEqual(f, fuse.sanitize_filename(f)) + for f in unacceptable: + self.assertNotEqual(f, fuse.sanitize_filename(f)) + # The sanitized filename should be the same length, though. + self.assertEqual(len(f), len(fuse.sanitize_filename(f))) + # Special cases + self.assertEqual("_", fuse.sanitize_filename("")) + self.assertEqual("_", fuse.sanitize_filename(".")) + self.assertEqual("__", fuse.sanitize_filename("..")) + self.assertEqual("__", fuse.sanitize_filename("..")) -- 2.30.2