X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/ee434dea5db0b02fe762ff80b2270cf268e84378..b352c3862814fe0bdd2b5a40b1dc8171474dbb48:/sdk/python/arvados/_normalize_stream.py diff --git a/sdk/python/arvados/_normalize_stream.py b/sdk/python/arvados/_normalize_stream.py index babcabc1a7..c72b82be1c 100644 --- a/sdk/python/arvados/_normalize_stream.py +++ b/sdk/python/arvados/_normalize_stream.py @@ -1,6 +1,15 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + from __future__ import absolute_import from . import config +import re + +def escape(path): + return re.sub(r'[\\:\000-\040]', lambda m: "\\%03o" % ord(m.group(0)), path) + def normalize_stream(stream_name, stream): """Take manifest stream and return a list of tokens in normalized format. @@ -12,7 +21,7 @@ def normalize_stream(stream_name, stream): """ - stream_name = stream_name.replace(' ', '\\040') + stream_name = escape(stream_name) stream_tokens = [stream_name] sortedfiles = list(stream.keys()) sortedfiles.sort() @@ -34,7 +43,7 @@ def normalize_stream(stream_name, stream): for streamfile in sortedfiles: # Add in file segments current_span = None - fout = streamfile.replace(' ', '\\040') + fout = escape(streamfile) for segment in stream[streamfile]: # Collapse adjacent segments streamoffset = blocks[segment.locator] + segment.segment_offset @@ -44,13 +53,13 @@ def normalize_stream(stream_name, stream): if streamoffset == current_span[1]: current_span[1] += segment.segment_size else: - stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout)) + stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout)) current_span = [streamoffset, streamoffset + segment.segment_size] if current_span is not None: - stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout)) + stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout)) if not stream[streamfile]: - stream_tokens.append("0:0:{0}".format(fout)) + stream_tokens.append(u"0:0:{0}".format(fout)) return stream_tokens