8784: Fix test for latest firefox.
[arvados.git] / sdk / python / arvados / _normalize_stream.py
1 from __future__ import absolute_import
2 from . import config
3
4 def normalize_stream(stream_name, stream):
5     """Take manifest stream and return a list of tokens in normalized format.
6
7     :stream_name:
8       The name of the stream.
9
10     :stream:
11       A dict mapping each filename to a list of `_range.LocatorAndRange` objects.
12
13     """
14
15     stream_name = stream_name.replace(' ', '\\040')
16     stream_tokens = [stream_name]
17     sortedfiles = list(stream.keys())
18     sortedfiles.sort()
19
20     blocks = {}
21     streamoffset = 0
22     # Go through each file and add each referenced block exactly once.
23     for streamfile in sortedfiles:
24         for segment in stream[streamfile]:
25             if segment.locator not in blocks:
26                 stream_tokens.append(segment.locator)
27                 blocks[segment.locator] = streamoffset
28                 streamoffset += segment.block_size
29
30     # Add the empty block if the stream is otherwise empty.
31     if len(stream_tokens) == 1:
32         stream_tokens.append(config.EMPTY_BLOCK_LOCATOR)
33
34     for streamfile in sortedfiles:
35         # Add in file segments
36         current_span = None
37         fout = streamfile.replace(' ', '\\040')
38         for segment in stream[streamfile]:
39             # Collapse adjacent segments
40             streamoffset = blocks[segment.locator] + segment.segment_offset
41             if current_span is None:
42                 current_span = [streamoffset, streamoffset + segment.segment_size]
43             else:
44                 if streamoffset == current_span[1]:
45                     current_span[1] += segment.segment_size
46                 else:
47                     stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
48                     current_span = [streamoffset, streamoffset + segment.segment_size]
49
50         if current_span is not None:
51             stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
52
53         if not stream[streamfile]:
54             stream_tokens.append("0:0:{0}".format(fout))
55
56     return stream_tokens