15370: Fix flaky test.
[arvados.git] / sdk / python / arvados / _normalize_stream.py
index e0831b1aacd648ec3820cbe58ee3b0e9b5f02c0c..485c757e7fce34dda579185608f39bfe4911bd94 100644 (file)
@@ -1,4 +1,16 @@
-import config
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import absolute_import
+from . import config
+
+import re
+
+def escape(path):
+    path = re.sub('\\\\', lambda m: '\\134', path)
+    path = re.sub('[:\000-\040]', lambda m: "\\%03o" % ord(m.group(0)), path)
+    return path
 
 def normalize_stream(stream_name, stream):
     """Take manifest stream and return a list of tokens in normalized format.
@@ -11,13 +23,13 @@ def normalize_stream(stream_name, stream):
 
     """
 
-    stream_name = stream_name.replace(' ', '\\040')
+    stream_name = escape(stream_name)
     stream_tokens = [stream_name]
     sortedfiles = list(stream.keys())
     sortedfiles.sort()
 
     blocks = {}
-    streamoffset = 0L
+    streamoffset = 0
     # Go through each file and add each referenced block exactly once.
     for streamfile in sortedfiles:
         for segment in stream[streamfile]:
@@ -33,7 +45,7 @@ def normalize_stream(stream_name, stream):
     for streamfile in sortedfiles:
         # Add in file segments
         current_span = None
-        fout = streamfile.replace(' ', '\\040')
+        fout = escape(streamfile)
         for segment in stream[streamfile]:
             # Collapse adjacent segments
             streamoffset = blocks[segment.locator] + segment.segment_offset
@@ -43,13 +55,13 @@ def normalize_stream(stream_name, stream):
                 if streamoffset == current_span[1]:
                     current_span[1] += segment.segment_size
                 else:
-                    stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+                    stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
                     current_span = [streamoffset, streamoffset + segment.segment_size]
 
         if current_span is not None:
-            stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+            stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
 
         if not stream[streamfile]:
-            stream_tokens.append("0:0:{0}".format(fout))
+            stream_tokens.append(u"0:0:{0}".format(fout))
 
     return stream_tokens