+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import absolute_import
+from . import config
+
+import re
+
+def escape(path):
+ path = re.sub('\\\\', lambda m: '\\134', path)
+ path = re.sub('[:\000-\040]', lambda m: "\\%03o" % ord(m.group(0)), path)
+ return path
+
def normalize_stream(stream_name, stream):
"""Take manifest stream and return a list of tokens in normalized format.
"""
+ stream_name = escape(stream_name)
stream_tokens = [stream_name]
sortedfiles = list(stream.keys())
sortedfiles.sort()
blocks = {}
- streamoffset = 0L
+ streamoffset = 0
# Go through each file and add each referenced block exactly once.
for streamfile in sortedfiles:
for segment in stream[streamfile]:
for streamfile in sortedfiles:
# Add in file segments
current_span = None
- fout = streamfile.replace(' ', '\\040')
+ fout = escape(streamfile)
for segment in stream[streamfile]:
# Collapse adjacent segments
streamoffset = blocks[segment.locator] + segment.segment_offset
if streamoffset == current_span[1]:
current_span[1] += segment.segment_size
else:
- stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+ stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
current_span = [streamoffset, streamoffset + segment.segment_size]
if current_span is not None:
- stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+ stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
if not stream[streamfile]:
- stream_tokens.append("0:0:{0}".format(fout))
+ stream_tokens.append(u"0:0:{0}".format(fout))
return stream_tokens