14539: Removes pointless code comment.
[arvados.git] / sdk / python / arvados / _normalize_stream.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6 from . import config
7
8 import re
9
10 def escape(path):
11     # Escape literal backslash
12     path = re.sub('\\\\', lambda m: '\\134', path)
13     path = re.sub('([:\000-\040])', lambda m: "\\%03o" % ord(m.group(1)), path)
14     return path
15
16 def normalize_stream(stream_name, stream):
17     """Take manifest stream and return a list of tokens in normalized format.
18
19     :stream_name:
20       The name of the stream.
21
22     :stream:
23       A dict mapping each filename to a list of `_range.LocatorAndRange` objects.
24
25     """
26
27     stream_name = escape(stream_name)
28     stream_tokens = [stream_name]
29     sortedfiles = list(stream.keys())
30     sortedfiles.sort()
31
32     blocks = {}
33     streamoffset = 0
34     # Go through each file and add each referenced block exactly once.
35     for streamfile in sortedfiles:
36         for segment in stream[streamfile]:
37             if segment.locator not in blocks:
38                 stream_tokens.append(segment.locator)
39                 blocks[segment.locator] = streamoffset
40                 streamoffset += segment.block_size
41
42     # Add the empty block if the stream is otherwise empty.
43     if len(stream_tokens) == 1:
44         stream_tokens.append(config.EMPTY_BLOCK_LOCATOR)
45
46     for streamfile in sortedfiles:
47         # Add in file segments
48         current_span = None
49         fout = escape(streamfile)
50         for segment in stream[streamfile]:
51             # Collapse adjacent segments
52             streamoffset = blocks[segment.locator] + segment.segment_offset
53             if current_span is None:
54                 current_span = [streamoffset, streamoffset + segment.segment_size]
55             else:
56                 if streamoffset == current_span[1]:
57                     current_span[1] += segment.segment_size
58                 else:
59                     stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
60                     current_span = [streamoffset, streamoffset + segment.segment_size]
61
62         if current_span is not None:
63             stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
64
65         if not stream[streamfile]:
66             stream_tokens.append("0:0:{0}".format(fout))
67
68     return stream_tokens