1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 from __future__ import absolute_import
11 # Escape literal backslash
12 path = re.sub('\\\\', lambda m: '\\134', path)
13 path = re.sub('([:\000-\040])', lambda m: "\\%03o" % ord(m.group(1)), path)
16 def normalize_stream(stream_name, stream):
17 """Take manifest stream and return a list of tokens in normalized format.
20 The name of the stream.
23 A dict mapping each filename to a list of `_range.LocatorAndRange` objects.
27 stream_name = escape(stream_name)
28 stream_tokens = [stream_name]
29 sortedfiles = list(stream.keys())
34 # Go through each file and add each referenced block exactly once.
35 for streamfile in sortedfiles:
36 for segment in stream[streamfile]:
37 if segment.locator not in blocks:
38 stream_tokens.append(segment.locator)
39 blocks[segment.locator] = streamoffset
40 streamoffset += segment.block_size
42 # Add the empty block if the stream is otherwise empty.
43 if len(stream_tokens) == 1:
44 stream_tokens.append(config.EMPTY_BLOCK_LOCATOR)
46 for streamfile in sortedfiles:
47 # Add in file segments
49 fout = escape(streamfile)
50 for segment in stream[streamfile]:
51 # Collapse adjacent segments
52 streamoffset = blocks[segment.locator] + segment.segment_offset
53 if current_span is None:
54 current_span = [streamoffset, streamoffset + segment.segment_size]
56 if streamoffset == current_span[1]:
57 current_span[1] += segment.segment_size
59 stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
60 current_span = [streamoffset, streamoffset + segment.segment_size]
62 if current_span is not None:
63 stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
65 if not stream[streamfile]:
66 stream_tokens.append("0:0:{0}".format(fout))