1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 from __future__ import absolute_import
8 def normalize_stream(stream_name, stream):
9 """Take manifest stream and return a list of tokens in normalized format.
12 The name of the stream.
15 A dict mapping each filename to a list of `_range.LocatorAndRange` objects.
19 stream_name = stream_name.replace(' ', '\\040')
20 stream_tokens = [stream_name]
21 sortedfiles = list(stream.keys())
26 # Go through each file and add each referenced block exactly once.
27 for streamfile in sortedfiles:
28 for segment in stream[streamfile]:
29 if segment.locator not in blocks:
30 stream_tokens.append(segment.locator)
31 blocks[segment.locator] = streamoffset
32 streamoffset += segment.block_size
34 # Add the empty block if the stream is otherwise empty.
35 if len(stream_tokens) == 1:
36 stream_tokens.append(config.EMPTY_BLOCK_LOCATOR)
38 for streamfile in sortedfiles:
39 # Add in file segments
41 fout = streamfile.replace(' ', '\\040')
42 for segment in stream[streamfile]:
43 # Collapse adjacent segments
44 streamoffset = blocks[segment.locator] + segment.segment_offset
45 if current_span is None:
46 current_span = [streamoffset, streamoffset + segment.segment_size]
48 if streamoffset == current_span[1]:
49 current_span[1] += segment.segment_size
51 stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
52 current_span = [streamoffset, streamoffset + segment.segment_size]
54 if current_span is not None:
55 stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
57 if not stream[streamfile]:
58 stream_tokens.append("0:0:{0}".format(fout))