Merge branch 'master' into 14539-pysdk-empty-dir
authorLucas Di Pentima <ldipentima@veritasgenetics.com>
Mon, 14 Jan 2019 17:16:32 +0000 (14:16 -0300)
committerLucas Di Pentima <ldipentima@veritasgenetics.com>
Mon, 14 Jan 2019 17:16:32 +0000 (14:16 -0300)
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima@veritasgenetics.com>

build/run-tests.sh
sdk/python/arvados/_normalize_stream.py
sdk/python/arvados/collection.py
sdk/python/tests/test_collections.py
services/fuse/tests/test_mount.py

index cb44372566f8eb36ec6163f9108c97e464d6fca8..c81376404cfae23224cca9d828527ebd3a2e3a61 100755 (executable)
@@ -37,7 +37,7 @@ CONFIGSRC=path Dir with api server config files to copy into source tree.
                (If none given, leave config files alone in source tree.)
 services/api_test="TEST=test/functional/arvados/v1/collections_controller_test.rb"
                Restrict apiserver tests to the given file
-sdk/python_test="--test-suite test.test_keep_locator"
+sdk/python_test="--test-suite tests.test_keep_locator"
                Restrict Python SDK tests to the given class
 apps/workbench_test="TEST=test/integration/pipeline_instances_test.rb"
                Restrict Workbench tests to the given file
index 47b66c82da000d840bdb7221575019cf6396981e..9caef764edd5af6b6f1c39149ee5f7bf64433fd7 100644 (file)
@@ -5,6 +5,11 @@
 from __future__ import absolute_import
 from . import config
 
+import re
+
+def escape(path):
+    return re.sub('\\\\([0-3][0-7][0-7])', lambda m: '\\134'+m.group(1), path).replace(' ', '\\040')
+
 def normalize_stream(stream_name, stream):
     """Take manifest stream and return a list of tokens in normalized format.
 
@@ -16,7 +21,7 @@ def normalize_stream(stream_name, stream):
 
     """
 
-    stream_name = stream_name.replace(' ', '\\040')
+    stream_name = escape(stream_name)
     stream_tokens = [stream_name]
     sortedfiles = list(stream.keys())
     sortedfiles.sort()
@@ -38,7 +43,7 @@ def normalize_stream(stream_name, stream):
     for streamfile in sortedfiles:
         # Add in file segments
         current_span = None
-        fout = streamfile.replace(' ', '\\040')
+        fout = escape(streamfile)
         for segment in stream[streamfile]:
             # Collapse adjacent segments
             streamoffset = blocks[segment.locator] + segment.segment_offset
index 627f0346db2c6760710db3edaf356f4cb724bf91..7ad07cc607206fe32f46fe0c94cf9ea34e115224 100644 (file)
@@ -26,7 +26,7 @@ from stat import *
 from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, WrappableFile, _BlockManager, synchronized, must_be_writable, NoopLock
 from .keep import KeepLocator, KeepClient
 from .stream import StreamReader
-from ._normalize_stream import normalize_stream
+from ._normalize_stream import normalize_stream, escape
 from ._ranges import Range, LocatorAndRange
 from .safeapi import ThreadSafeApiCache
 import arvados.config as config
@@ -562,6 +562,7 @@ class RichCollectionBase(CollectionBase):
     def stream_name(self):
         raise NotImplementedError()
 
+
     @synchronized
     def has_remote_blocks(self):
         """Recursively check for a +R segment locator signature."""
@@ -1058,7 +1059,9 @@ class RichCollectionBase(CollectionBase):
             if stream:
                 buf.append(" ".join(normalize_stream(stream_name, stream)) + "\n")
             for dirname in [s for s in sorted_keys if isinstance(self[s], RichCollectionBase)]:
-                buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip, normalize=True, only_committed=only_committed))
+                buf.append(self[dirname].manifest_text(
+                    stream_name=os.path.join(stream_name, dirname),
+                    strip=strip, normalize=True, only_committed=only_committed))
             return "".join(buf)
         else:
             if strip:
@@ -1833,6 +1836,16 @@ class Subcollection(RichCollectionBase):
         self.name = newname
         self.lock = self.parent.root_collection().lock
 
+    @synchronized
+    def _get_manifest_text(self, stream_name, strip, normalize, only_committed=False):
+        """Encode empty directories by using an \056-named (".") empty file"""
+        if len(self._items) == 0:
+            return "%s %s 0:0:\\056\n" % (
+                escape(stream_name), config.EMPTY_BLOCK_LOCATOR)
+        return super(Subcollection, self)._get_manifest_text(stream_name,
+                                                             strip, normalize,
+                                                             only_committed)
+
 
 class CollectionReader(Collection):
     """A read-only collection object.
index de01006741e91b12047f70d6b82dfff04f80bfdc..3a4dabfeae7877d9b8a4a2def8b713886f1149f6 100644 (file)
@@ -952,10 +952,35 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         self.assertIs(c.find("./nonexistant.txt"), None)
         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
 
+    def test_escaped_paths_dont_get_unescaped_on_manifest(self):
+        # Dir & file names are literally '\056' (escaped form: \134056)
+        manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+        c = Collection(manifest)
+        self.assertEqual(c.portable_manifest_text(), manifest)
+
+    def test_escaped_paths_do_get_unescaped_on_listing(self):
+        # Dir & file names are literally '\056' (escaped form: \134056)
+        manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+        c = Collection(manifest)
+        self.assertIn('\\056 Test', c.keys())
+        self.assertIn('\\056', c['\\056 Test'].keys())
+
+    def test_make_empty_dir_with_escaped_chars(self):
+        c = Collection()
+        c.mkdirs('./Empty\\056Dir')
+        self.assertEqual(c.portable_manifest_text(),
+                         './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
+    def test_make_empty_dir_with_spaces(self):
+        c = Collection()
+        c.mkdirs('./foo bar/baz waz')
+        self.assertEqual(c.portable_manifest_text(),
+                         './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
     def test_remove_in_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
         c.remove("foo/count2.txt")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
 
     def test_remove_empty_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
index fb282d1aaa76a91cd58b9a5a15a28e7b263a1c4c..6a00cdb6b42d815a280930ec11650572581881aa 100644 (file)
@@ -158,7 +158,7 @@ class FuseMagicTest(MountTestBase):
         self.assertIn(self.testcollection,
                       llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
         self.assertIn(self.test_project, mount_ls)
-        self.assertIn(self.test_project, 
+        self.assertIn(self.test_project,
                       llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
 
         with self.assertRaises(OSError):
@@ -617,7 +617,8 @@ class FuseRmTest(MountTestBase):
 
         # Can't have empty directories :-( so manifest will be empty.
         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
-        self.assertEqual(collection2["manifest_text"], "")
+        self.assertRegexpMatches(collection2["manifest_text"],
+                                 r'./testdir d41d8cd98f00b204e9800998ecf8427e\+0\+A\S+ 0:0:\\056\n')
 
         self.pool.apply(fuseRmTestHelperRmdir, (self.mounttmp,))
 
@@ -674,7 +675,7 @@ class FuseMvFileTest(MountTestBase):
 
         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
         self.assertRegexpMatches(collection2["manifest_text"],
-            r'\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
+            r'\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt\n\./testdir d41d8cd98f00b204e9800998ecf8427e\+0\+A\S+ 0:0:\\056\n')
 
 
 def fuseRenameTestHelper(mounttmp):