+import util
+
+def normalize_stream(s, stream):
+ stream_tokens = [s]
+ sortedfiles = list(stream.keys())
+ sortedfiles.sort()
+
+ blocks = {}
+ streamoffset = 0L
+ for f in sortedfiles:
+ for b in stream[f]:
+ if b[arvados.LOCATOR] not in blocks:
+ stream_tokens.append(b[arvados.LOCATOR])
+ blocks[b[arvados.LOCATOR]] = streamoffset
+ streamoffset += b[arvados.BLOCKSIZE]
+
+ for f in sortedfiles:
+ current_span = None
+ fout = f.replace(' ', '\\040')
+ for segment in stream[f]:
+ segmentoffset = blocks[segment[arvados.LOCATOR]] + segment[arvados.OFFSET]
+ if current_span == None:
+ current_span = [segmentoffset, segmentoffset + segment[arvados.SEGMENTSIZE]]
+ else:
+ if segmentoffset == current_span[1]:
+ current_span[1] += segment[arvados.SEGMENTSIZE]
+ else:
+ stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+ current_span = [segmentoffset, segmentoffset + segment[arvados.SEGMENTSIZE]]
+
+ if current_span != None:
+ stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+
+ if len(stream[f]) == 0:
+ stream_tokens.append("0:0:{0}".format(fout))
+
+ return stream_tokens
+
+def normalize(collection):
+ streams = {}
+ for s in collection.all_streams():
+ for f in s.all_files():
+ filestream = s.name() + "/" + f.name()
+ r = filestream.rindex("/")
+ streamname = filestream[:r]
+ filename = filestream[r+1:]
+ if streamname not in streams:
+ streams[streamname] = {}
+ if filename not in streams[streamname]:
+ streams[streamname][filename] = []
+ for r in f.segments:
+ streams[streamname][filename].extend(s.locators_and_ranges(r[0], r[1]))
+
+ normalized_streams = []
+ sortedstreams = list(streams.keys())
+ sortedstreams.sort()
+ for s in sortedstreams:
+ normalized_streams.append(normalize_stream(s, streams[s]))
+ return normalized_streams
+