4363: Fix filename munging. Add tests.
authorTom Clegg <tom@curoverse.com>
Wed, 12 Nov 2014 21:55:06 +0000 (16:55 -0500)
committerTom Clegg <tom@curoverse.com>
Wed, 12 Nov 2014 21:55:06 +0000 (16:55 -0500)
services/fuse/arvados_fuse/__init__.py
services/fuse/tests/test_mount.py

index d0f2643ff11c043ba23266c84cd57660dfb0cbf3..9154c827ca64c4a7f8389168e004e3d8078caf7f 100644 (file)
@@ -24,6 +24,11 @@ from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uu
 
 _logger = logging.getLogger('arvados.arvados_fuse')
 
+# Match any character which FUSE or Linux cannot accommodate as part
+# of a filename. (If present in a collection filename, they will
+# appear as underscores in the fuse mount.)
+_disallowed_filename_characters = re.compile('[\x00/]')
+
 class SafeApi(object):
     '''Threadsafe wrapper for API object.  This stores and returns a different api
     object per thread, because httplib2 which underlies apiclient is not
@@ -64,24 +69,17 @@ def convertTime(t):
         return 0
 
 def sanitize_filename(dirty):
-    '''Remove troublesome characters from filenames.'''
-    # http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html
+    '''Replace disallowed filename characters with harmless "_".'''
     if dirty is None:
         return None
-
-    fn = ""
-    for c in dirty:
-        if (c >= '\x00' and c <= '\x1f') or c == '\x7f' or c == '/':
-            # skip control characters and /
-            continue
-        fn += c
-
-    # strip leading - or ~ and leading/trailing whitespace
-    stripped = fn.lstrip("-~ ").rstrip()
-    if len(stripped) > 0:
-        return stripped
+    elif dirty == '':
+        return '_'
+    elif dirty == '.':
+        return '_'
+    elif dirty == '..':
+        return '__'
     else:
-        return None
+        return _disallowed_filename_characters.sub('_', dirty)
 
 
 class FreshBase(object):
index bb14d43c0cc2de2a173aba4d4df26accef40b827..f9d06de3accbdf34a22fcf0230979f4457764ddc 100644 (file)
@@ -311,3 +311,36 @@ class FuseHomeTest(MountTestBase):
         d3 = os.listdir(os.path.join(self.mounttmp, 'Unrestricted public data', 'GNU General Public License, version 3'))
         d3.sort()
         self.assertEqual(["GNU_General_Public_License,_version_3.pdf"], d3)
+
+
+class FuseUnitTest(unittest.TestCase):
+    def test_sanitize_filename(self):
+        acceptable = [
+            "foo.txt",
+            ".foo",
+            "..foo",
+            "...",
+            "foo...",
+            "foo..",
+            "foo.",
+            "-",
+            "\x01\x02\x03",
+            ]
+        unacceptable = [
+            "f\00",
+            "\00\00",
+            "/foo",
+            "foo/",
+            "//",
+            ]
+        for f in acceptable:
+            self.assertEqual(f, fuse.sanitize_filename(f))
+        for f in unacceptable:
+            self.assertNotEqual(f, fuse.sanitize_filename(f))
+            # The sanitized filename should be the same length, though.
+            self.assertEqual(len(f), len(fuse.sanitize_filename(f)))
+        # Special cases
+        self.assertEqual("_", fuse.sanitize_filename(""))
+        self.assertEqual("_", fuse.sanitize_filename("."))
+        self.assertEqual("__", fuse.sanitize_filename(".."))
+        self.assertEqual("__", fuse.sanitize_filename(".."))