1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 from __future__ import absolute_import
11 path = re.sub('\\\\', lambda m: '\\134', path)
12 path = re.sub('[:\000-\040]', lambda m: "\\%03o" % ord(m.group(0)), path)
15 def normalize_stream(stream_name, stream):
16 """Take manifest stream and return a list of tokens in normalized format.
19 The name of the stream.
22 A dict mapping each filename to a list of `_range.LocatorAndRange` objects.
26 stream_name = escape(stream_name)
27 stream_tokens = [stream_name]
28 sortedfiles = list(stream.keys())
33 # Go through each file and add each referenced block exactly once.
34 for streamfile in sortedfiles:
35 for segment in stream[streamfile]:
36 if segment.locator not in blocks:
37 stream_tokens.append(segment.locator)
38 blocks[segment.locator] = streamoffset
39 streamoffset += segment.block_size
41 # Add the empty block if the stream is otherwise empty.
42 if len(stream_tokens) == 1:
43 stream_tokens.append(config.EMPTY_BLOCK_LOCATOR)
45 for streamfile in sortedfiles:
46 # Add in file segments
48 fout = escape(streamfile)
49 for segment in stream[streamfile]:
50 # Collapse adjacent segments
51 streamoffset = blocks[segment.locator] + segment.segment_offset
52 if current_span is None:
53 current_span = [streamoffset, streamoffset + segment.segment_size]
55 if streamoffset == current_span[1]:
56 current_span[1] += segment.segment_size
58 stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
59 current_span = [streamoffset, streamoffset + segment.segment_size]
61 if current_span is not None:
62 stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
64 if not stream[streamfile]:
65 stream_tokens.append(u"0:0:{0}".format(fout))