X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/05ff1098f0e9eda5d642a1249f8b3a236656320c..6617a2ba4323d2f47566c89961763625fce2e1ca:/sdk/python/arvados/arvfile.py diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py index 83ef76bcab..b78c63e301 100644 --- a/sdk/python/arvados/arvfile.py +++ b/sdk/python/arvados/arvfile.py @@ -108,6 +108,7 @@ class ArvadosFileReaderBase(_FileLikeObjectBase): cache_pos, cache_data = self._readline_cache if self.tell() == cache_pos: data = [cache_data] + self._filepos += len(cache_data) else: data = [''] data_size = len(data[-1]) @@ -123,13 +124,14 @@ class ArvadosFileReaderBase(_FileLikeObjectBase): except ValueError: nextline_index = len(data) nextline_index = min(nextline_index, size) + self._filepos -= len(data) - nextline_index self._readline_cache = (self.tell(), data[nextline_index:]) return data[:nextline_index] @_FileLikeObjectBase._before_close @retry_method def decompress(self, decompress, size, num_retries=None): - for segment in self.readall(size, num_retries): + for segment in self.readall(size, num_retries=num_retries): data = decompress(segment) if data: yield data @@ -311,28 +313,30 @@ class _BufferBlock(object): else: raise AssertionError("Buffer block is not writable") + STATE_TRANSITIONS = frozenset([ + (WRITABLE, PENDING), + (PENDING, COMMITTED), + (PENDING, ERROR), + (ERROR, PENDING)]) + @synchronized def set_state(self, nextstate, val=None): - if ((self._state == _BufferBlock.WRITABLE and nextstate == _BufferBlock.PENDING) or - (self._state == _BufferBlock.PENDING and nextstate == _BufferBlock.COMMITTED) or - (self._state == _BufferBlock.PENDING and nextstate == _BufferBlock.ERROR) or - (self._state == _BufferBlock.ERROR and nextstate == _BufferBlock.PENDING)): - self._state = nextstate - - if self._state == _BufferBlock.PENDING: - self.wait_for_commit.clear() - - if self._state == _BufferBlock.COMMITTED: - self._locator = val - self.buffer_view = None - self.buffer_block = None - self.wait_for_commit.set() - - if self._state == _BufferBlock.ERROR: - self.error = val - self.wait_for_commit.set() - else: - raise StateChangeError("Invalid state change from %s to %s" % (self.state, nextstate), self.state, nextstate) + if (self._state, nextstate) not in self.STATE_TRANSITIONS: + raise StateChangeError("Invalid state change from %s to %s" % (self._state, nextstate), self._state, nextstate) + self._state = nextstate + + if self._state == _BufferBlock.PENDING: + self.wait_for_commit.clear() + + if self._state == _BufferBlock.COMMITTED: + self._locator = val + self.buffer_view = None + self.buffer_block = None + self.wait_for_commit.set() + + if self._state == _BufferBlock.ERROR: + self.error = val + self.wait_for_commit.set() @synchronized def state(self): @@ -489,7 +493,7 @@ class _BlockManager(object): for i in xrange(0, self.num_put_threads): thread = threading.Thread(target=self._commit_bufferblock_worker) self._put_threads.append(thread) - thread.daemon = False + thread.daemon = True thread.start() def _block_prefetch_worker(self): @@ -541,9 +545,6 @@ class _BlockManager(object): def __exit__(self, exc_type, exc_value, traceback): self.stop_threads() - def __del__(self): - self.stop_threads() - def commit_bufferblock(self, block, sync): """Initiate a background upload of a bufferblock. @@ -562,11 +563,17 @@ class _BlockManager(object): # Mark the block as PENDING so to disallow any more appends. block.set_state(_BufferBlock.PENDING) except StateChangeError as e: - if e.state == _BufferBlock.PENDING and sync: - block.wait_for_commit.wait() - if block.state() == _BufferBlock.ERROR: - raise block.error - return + if e.state == _BufferBlock.PENDING: + if sync: + block.wait_for_commit.wait() + else: + return + if block.state() == _BufferBlock.COMMITTED: + return + elif block.state() == _BufferBlock.ERROR: + raise block.error + else: + raise if sync: try: @@ -639,7 +646,6 @@ class _BlockManager(object): if v.owner: v.owner.flush(sync=True) - def block_prefetch(self, locator): """Initiate a background download of a block. @@ -653,9 +659,13 @@ class _BlockManager(object): if not self.prefetch_enabled: return + if self._keep.get_from_cache(locator) is not None: + return + with self.lock: if locator in self._bufferblocks: return + self.start_get_threads() self._prefetch_queue.put(locator) @@ -811,20 +821,25 @@ class ArvadosFile(object): with self.lock: if size == 0 or offset >= self.size(): return '' - prefetch = locators_and_ranges(self._segments, offset, size + config.KEEP_BLOCK_SIZE) readsegs = locators_and_ranges(self._segments, offset, size) + prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE, limit=32) - for lr in prefetch: - self.parent._my_block_manager().block_prefetch(lr.locator) - + locs = set() data = [] for lr in readsegs: block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact)) if block: blockview = memoryview(block) data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size].tobytes()) + locs.add(lr.locator) else: break + + for lr in prefetch: + if lr.locator not in locs: + self.parent._my_block_manager().block_prefetch(lr.locator) + locs.add(lr.locator) + return ''.join(data) def _repack_writes(self, num_retries): @@ -919,7 +934,7 @@ class ArvadosFile(object): bb = self.parent._my_block_manager().get_bufferblock(s.locator) if bb: if bb.state() != _BufferBlock.COMMITTED: - self.parent._my_block_manager().commit_bufferblock(self._current_bblock, sync=True) + self.parent._my_block_manager().commit_bufferblock(bb, sync=True) to_delete.add(s.locator) s.locator = bb.locator() for s in to_delete: @@ -1060,7 +1075,7 @@ class ArvadosFileWriter(ArvadosFileReader): @retry_method def writelines(self, seq, num_retries=None): for s in seq: - self.write(s, num_retries) + self.write(s, num_retries=num_retries) @_FileLikeObjectBase._before_close def truncate(self, size=None):