14345: Unescape all \ooo in names in manifests, not just \040.
authorTom Clegg <tclegg@veritasgenetics.com>
Mon, 5 Nov 2018 21:46:50 +0000 (16:46 -0500)
committerTom Clegg <tclegg@veritasgenetics.com>
Thu, 8 Nov 2018 16:21:33 +0000 (11:21 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

sdk/python/arvados/collection.py

index 55797bdfebd53a49db6acd5bf6ea6f1443a317f3..f4502b7e668113a6eed7809f5e6552c2a298f055 100644 (file)
@@ -1667,6 +1667,9 @@ class Collection(RichCollectionBase):
     _block_re = re.compile(r'[0-9a-f]{32}\+(\d+)(\+\S+)*')
     _segment_re = re.compile(r'(\d+):(\d+):(\S+)')
 
+    def _unescape_manifest_path(self, path):
+        return re.sub('\\\\([0-3][0-7][0-7])', lambda m: chr(int(m.group(1), 8)), path)
+
     @synchronized
     def _import_manifest(self, manifest_text):
         """Import a manifest into a `Collection`.
@@ -1691,7 +1694,7 @@ class Collection(RichCollectionBase):
 
             if state == STREAM_NAME:
                 # starting a new stream
-                stream_name = tok.replace('\\040', ' ')
+                stream_name = self._unescape_manifest_path(tok)
                 blocks = []
                 segments = []
                 streamoffset = 0
@@ -1713,7 +1716,7 @@ class Collection(RichCollectionBase):
                 if file_segment:
                     pos = int(file_segment.group(1))
                     size = int(file_segment.group(2))
-                    name = file_segment.group(3).replace('\\040', ' ')
+                    name = self._unescape_manifest_path(file_segment.group(3))
                     filepath = os.path.join(stream_name, name)
                     afile = self.find_or_create(filepath, FILE)
                     if isinstance(afile, ArvadosFile):