-import config
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import absolute_import
+from . import config
+
+import re
+
+def escape(path):
+ path = re.sub('\\\\', lambda m: '\\134', path)
+ path = re.sub('[:\000-\040]', lambda m: "\\%03o" % ord(m.group(0)), path)
+ return path
def normalize_stream(stream_name, stream):
"""Take manifest stream and return a list of tokens in normalized format.
"""
- stream_name = stream_name.replace(' ', '\\040')
+ stream_name = escape(stream_name)
stream_tokens = [stream_name]
sortedfiles = list(stream.keys())
sortedfiles.sort()
blocks = {}
- streamoffset = 0L
+ streamoffset = 0
# Go through each file and add each referenced block exactly once.
for streamfile in sortedfiles:
for segment in stream[streamfile]:
for streamfile in sortedfiles:
# Add in file segments
current_span = None
- fout = streamfile.replace(' ', '\\040')
+ fout = escape(streamfile)
for segment in stream[streamfile]:
# Collapse adjacent segments
streamoffset = blocks[segment.locator] + segment.segment_offset