X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/607fe087f6167061714a524dd53cbbc21b974973..55b5ec75a3f6c9154778c8836a99db37e3250abf:/sdk/python/arvados/_normalize_stream.py diff --git a/sdk/python/arvados/_normalize_stream.py b/sdk/python/arvados/_normalize_stream.py index babcabc1a7..485c757e7f 100644 --- a/sdk/python/arvados/_normalize_stream.py +++ b/sdk/python/arvados/_normalize_stream.py @@ -1,6 +1,17 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + from __future__ import absolute_import from . import config +import re + +def escape(path): + path = re.sub('\\\\', lambda m: '\\134', path) + path = re.sub('[:\000-\040]', lambda m: "\\%03o" % ord(m.group(0)), path) + return path + def normalize_stream(stream_name, stream): """Take manifest stream and return a list of tokens in normalized format. @@ -12,7 +23,7 @@ def normalize_stream(stream_name, stream): """ - stream_name = stream_name.replace(' ', '\\040') + stream_name = escape(stream_name) stream_tokens = [stream_name] sortedfiles = list(stream.keys()) sortedfiles.sort() @@ -34,7 +45,7 @@ def normalize_stream(stream_name, stream): for streamfile in sortedfiles: # Add in file segments current_span = None - fout = streamfile.replace(' ', '\\040') + fout = escape(streamfile) for segment in stream[streamfile]: # Collapse adjacent segments streamoffset = blocks[segment.locator] + segment.segment_offset @@ -44,13 +55,13 @@ def normalize_stream(stream_name, stream): if streamoffset == current_span[1]: current_span[1] += segment.segment_size else: - stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout)) + stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout)) current_span = [streamoffset, streamoffset + segment.segment_size] if current_span is not None: - stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout)) + stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout)) if not stream[streamfile]: - stream_tokens.append("0:0:{0}".format(fout)) + stream_tokens.append(u"0:0:{0}".format(fout)) return stream_tokens