30 def locators_and_ranges(data_locators, range_start, range_size):
31 '''returns list of [block locator, blocksize, segment offset, segment size] that satisfies the range'''
33 range_start = long(range_start)
34 range_size = long(range_size)
35 range_end = range_start + range_size
37 for locator, block_size, block_start in data_locators:
38 block_end = block_start + block_size
39 if range_end < block_start:
40 # range ends before this block starts, so don't look at any more locators
42 if range_start > block_end:
43 # range starts after this block ends, so go to next block
45 elif range_start >= block_start and range_end <= block_end:
46 # range starts and ends in this block
47 resp.append([locator, block_size, range_start - block_start, range_size])
48 elif range_start >= block_start:
49 # range starts in this block
50 resp.append([locator, block_size, range_start - block_start, block_end - range_start])
51 elif range_start < block_start and range_end > block_end:
52 # range starts in a previous block and extends to further blocks
53 resp.append([locator, block_size, 0L, block_size])
54 elif range_start < block_start and range_end <= block_end:
55 # range starts in a previous block and ends in this block
56 resp.append([locator, block_size, 0L, range_end - block_start])
57 block_start = block_end
61 class StreamFileReader(object):
62 def __init__(self, stream, segments, name):
64 self.segments = segments
71 def decompressed_name(self):
72 return re.sub('\.(bz2|gz)$', '', self._name)
74 def stream_name(self):
75 return self._stream.name()
78 self._filepos = min(max(pos, 0L), self.size())
85 return n[OFFSET] + n[BLOCKSIZE]
88 """Read up to 'size' bytes from the stream, starting at the current file position"""
93 for locator, blocksize, segmentoffset, segmentsize in locators_and_ranges(self.segments, self._filepos, size):
94 self._stream.seek(locator+segmentoffset)
95 data += self._stream.read(segmentsize)
96 self._filepos += len(data)
99 def readall(self, size=2**20):
101 data = self.read(size)
106 def bunzip2(self, size):
107 decompressor = bz2.BZ2Decompressor()
108 for segment in self.readall(size):
109 data = decompressor.decompress(segment)
110 if data and data != '':
113 def gunzip(self, size):
114 decompressor = zlib.decompressobj(16+zlib.MAX_WBITS)
115 for segment in self.readall(size):
116 data = decompressor.decompress(decompressor.unconsumed_tail + segment)
117 if data and data != '':
120 def readall_decompressed(self, size=2**20):
122 if re.search('\.bz2$', self._name):
123 return self.bunzip2(size)
124 elif re.search('\.gz$', self._name):
125 return self.gunzip(size)
127 return self.readall(size)
129 def readlines(self, decompress=True):
131 datasource = self.readall_decompressed()
133 self._stream.seek(self._pos + self._filepos)
134 datasource = self.readall()
136 for newdata in datasource:
140 eol = string.find(data, "\n", sol)
143 yield data[sol:eol+1]
150 class StreamReader(object):
151 def __init__(self, tokens):
152 self._tokens = tokens
155 self._stream_name = None
156 self.data_locators = []
161 for tok in self._tokens:
162 if self._stream_name == None:
163 self._stream_name = tok.replace('\\040', ' ')
166 s = re.match(r'^[0-9a-f]{32}\+(\d+)(\+\S+)*$', tok)
168 blocksize = long(s.group(1))
169 self.data_locators.append([tok, blocksize, streamoffset])
170 streamoffset += blocksize
173 s = re.search(r'^(\d+):(\d+):(\S+)', tok)
175 pos = long(s.group(1))
176 size = long(s.group(2))
177 name = s.group(3).replace('\\040', ' ')
178 if name not in self.files:
179 self.files[name] = StreamFileReader(self, [[pos, size, 0]], name)
182 n.segments.append([pos, size, n.size()])
185 raise errors.SyntaxError("Invalid manifest format")
191 return self._stream_name
194 return self.files.values()
197 """Set the position of the next read operation."""
204 n = self.data_locators[-1]
205 return n[self.OFFSET] + n[self.BLOCKSIZE]
207 def locators_and_ranges(self, range_start, range_size):
208 return locators_and_ranges(self.data_locators, range_start, range_size)
210 def read(self, size):
211 """Read up to 'size' bytes from the stream, starting at the current file position"""
215 for locator, blocksize, segmentoffset, segmentsize in locators_and_ranges(self.data_locators, self._pos, size):
216 data += Keep.get(locator)[segmentoffset:segmentoffset+segmentsize]
217 self._pos += len(data)