X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/aff4a730ad890564ee05c2395c4ebb49458e3cdc..be2d7e55af1036699282d06f629805b6508b6ceb:/sdk/python/arvados/arvfile.py diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py index 5befa193d3..0326608a0b 100644 --- a/sdk/python/arvados/arvfile.py +++ b/sdk/python/arvados/arvfile.py @@ -129,7 +129,7 @@ class ArvadosFileReaderBase(_FileLikeObjectBase): @_FileLikeObjectBase._before_close @retry_method def decompress(self, decompress, size, num_retries=None): - for segment in self.readall(size, num_retries): + for segment in self.readall(size, num_retries=num_retries): data = decompress(segment) if data: yield data @@ -311,28 +311,30 @@ class _BufferBlock(object): else: raise AssertionError("Buffer block is not writable") + STATE_TRANSITIONS = frozenset([ + (WRITABLE, PENDING), + (PENDING, COMMITTED), + (PENDING, ERROR), + (ERROR, PENDING)]) + @synchronized def set_state(self, nextstate, val=None): - if ((self._state == _BufferBlock.WRITABLE and nextstate == _BufferBlock.PENDING) or - (self._state == _BufferBlock.PENDING and nextstate == _BufferBlock.COMMITTED) or - (self._state == _BufferBlock.PENDING and nextstate == _BufferBlock.ERROR) or - (self._state == _BufferBlock.ERROR and nextstate == _BufferBlock.PENDING)): - self._state = nextstate - - if self._state == _BufferBlock.PENDING: - self.wait_for_commit.clear() - - if self._state == _BufferBlock.COMMITTED: - self._locator = val - self.buffer_view = None - self.buffer_block = None - self.wait_for_commit.set() - - if self._state == _BufferBlock.ERROR: - self.error = val - self.wait_for_commit.set() - else: - raise StateChangeError("Invalid state change from %s to %s" % (self.state, nextstate), self.state, nextstate) + if (self._state, nextstate) not in self.STATE_TRANSITIONS: + raise StateChangeError("Invalid state change from %s to %s" % (self._state, nextstate), self._state, nextstate) + self._state = nextstate + + if self._state == _BufferBlock.PENDING: + self.wait_for_commit.clear() + + if self._state == _BufferBlock.COMMITTED: + self._locator = val + self.buffer_view = None + self.buffer_block = None + self.wait_for_commit.set() + + if self._state == _BufferBlock.ERROR: + self.error = val + self.wait_for_commit.set() @synchronized def state(self): @@ -489,7 +491,7 @@ class _BlockManager(object): for i in xrange(0, self.num_put_threads): thread = threading.Thread(target=self._commit_bufferblock_worker) self._put_threads.append(thread) - thread.daemon = False + thread.daemon = True thread.start() def _block_prefetch_worker(self): @@ -553,7 +555,7 @@ class _BlockManager(object): :sync: If `sync` is True, upload the block synchronously. If `sync` is False, upload the block asynchronously. This will - return immediately unless if the upload queue is at capacity, in + return immediately unless the upload queue is at capacity, in which case it will wait on an upload queue slot. """ @@ -562,11 +564,17 @@ class _BlockManager(object): # Mark the block as PENDING so to disallow any more appends. block.set_state(_BufferBlock.PENDING) except StateChangeError as e: - if e.state == _BufferBlock.PENDING and sync: - block.wait_for_commit.wait() - if block.state() == _BufferBlock.ERROR: - raise block.error - return + if e.state == _BufferBlock.PENDING: + if sync: + block.wait_for_commit.wait() + else: + return + if block.state() == _BufferBlock.COMMITTED: + return + elif block.state() == _BufferBlock.ERROR: + raise block.error + else: + raise if sync: try: @@ -639,7 +647,6 @@ class _BlockManager(object): if v.owner: v.owner.flush(sync=True) - def block_prefetch(self, locator): """Initiate a background download of a block. @@ -653,9 +660,13 @@ class _BlockManager(object): if not self.prefetch_enabled: return + if self._keep.get_from_cache(locator) is not None: + return + with self.lock: if locator in self._bufferblocks: return + self.start_get_threads() self._prefetch_queue.put(locator) @@ -811,19 +822,25 @@ class ArvadosFile(object): with self.lock: if size == 0 or offset >= self.size(): return '' - prefetch = locators_and_ranges(self._segments, offset, size + config.KEEP_BLOCK_SIZE) readsegs = locators_and_ranges(self._segments, offset, size) + prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE, limit=32) - for lr in prefetch: - self.parent._my_block_manager().block_prefetch(lr.locator) - + locs = set() data = [] for lr in readsegs: block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact)) if block: - data.append(block[lr.segment_offset:lr.segment_offset+lr.segment_size]) + blockview = memoryview(block) + data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size].tobytes()) + locs.add(lr.locator) else: break + + for lr in prefetch: + if lr.locator not in locs: + self.parent._my_block_manager().block_prefetch(lr.locator) + locs.add(lr.locator) + return ''.join(data) def _repack_writes(self, num_retries): @@ -918,7 +935,7 @@ class ArvadosFile(object): bb = self.parent._my_block_manager().get_bufferblock(s.locator) if bb: if bb.state() != _BufferBlock.COMMITTED: - self.parent._my_block_manager().commit_bufferblock(self._current_bblock, sync=True) + self.parent._my_block_manager().commit_bufferblock(bb, sync=True) to_delete.add(s.locator) s.locator = bb.locator() for s in to_delete: @@ -1059,7 +1076,7 @@ class ArvadosFileWriter(ArvadosFileReader): @retry_method def writelines(self, seq, num_retries=None): for s in seq: - self.write(s, num_retries) + self.write(s, num_retries=num_retries) @_FileLikeObjectBase._before_close def truncate(self, size=None):