14539: Encode \t & \n chars on file/dir names on collection's manifest.
authorLucas Di Pentima <ldipentima@veritasgenetics.com>
Tue, 15 Jan 2019 17:25:39 +0000 (14:25 -0300)
committerLucas Di Pentima <ldipentima@veritasgenetics.com>
Tue, 15 Jan 2019 17:25:39 +0000 (14:25 -0300)
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima@veritasgenetics.com>

sdk/python/arvados/_normalize_stream.py
sdk/python/tests/test_collections.py

index 9caef764edd5af6b6f1c39149ee5f7bf64433fd7..f1f6052cddb9aebff6e4c47652795b4a63f1995c 100644 (file)
@@ -8,7 +8,15 @@ from . import config
 import re
 
 def escape(path):
-    return re.sub('\\\\([0-3][0-7][0-7])', lambda m: '\\134'+m.group(1), path).replace(' ', '\\040')
+    replacements = [
+        ('\t', '\\011'),
+        ('\n', '\\012'),
+        (' ', '\\040'),
+    ]
+    path = re.sub('\\\\([0-3][0-7][0-7])', lambda m: '\\134'+m.group(1), path)
+    for a, b in replacements:
+        path = path.replace(a, b)
+    return path
 
 def normalize_stream(stream_name, stream):
     """Take manifest stream and return a list of tokens in normalized format.
index 3a4dabfeae7877d9b8a4a2def8b713886f1149f6..f3ad16ecfe4aaeced2f969c6cc6453a33c8daaa2 100644 (file)
@@ -958,6 +958,17 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         c = Collection(manifest)
         self.assertEqual(c.portable_manifest_text(), manifest)
 
+    def test_other_special_chars_on_file_token(self):
+        cases = [
+            ('\\011', '\t'),
+            ('\\012', '\n'),
+        ]
+        for encoded, decoded in cases:
+            manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded
+            c = Collection(manifest)
+            self.assertEqual(c.portable_manifest_text(), manifest)
+            self.assertIn('some%sfile.txt' % decoded, c.keys())
+
     def test_escaped_paths_do_get_unescaped_on_listing(self):
         # Dir & file names are literally '\056' (escaped form: \134056)
         manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'