Merge branch '6362-asset-config' refs #6362
[arvados.git] / sdk / python / arvados / arvfile.py
index 5befa193d3af91c9bd20a25d284c105729281900..7cd64aa16e46d415516f1d6179df2a7c3fb6df2f 100644 (file)
@@ -311,28 +311,30 @@ class _BufferBlock(object):
         else:
             raise AssertionError("Buffer block is not writable")
 
+    STATE_TRANSITIONS = frozenset([
+            (WRITABLE, PENDING),
+            (PENDING, COMMITTED),
+            (PENDING, ERROR),
+            (ERROR, PENDING)])
+
     @synchronized
     def set_state(self, nextstate, val=None):
-        if ((self._state == _BufferBlock.WRITABLE and nextstate == _BufferBlock.PENDING) or
-            (self._state == _BufferBlock.PENDING and nextstate == _BufferBlock.COMMITTED) or
-            (self._state == _BufferBlock.PENDING and nextstate == _BufferBlock.ERROR) or
-            (self._state == _BufferBlock.ERROR and nextstate == _BufferBlock.PENDING)):
-            self._state = nextstate
-
-            if self._state == _BufferBlock.PENDING:
-                self.wait_for_commit.clear()
-
-            if self._state == _BufferBlock.COMMITTED:
-                self._locator = val
-                self.buffer_view = None
-                self.buffer_block = None
-                self.wait_for_commit.set()
-
-            if self._state == _BufferBlock.ERROR:
-                self.error = val
-                self.wait_for_commit.set()
-        else:
+        if (self._state, nextstate) not in self.STATE_TRANSITIONS:
             raise StateChangeError("Invalid state change from %s to %s" % (self.state, nextstate), self.state, nextstate)
+        self._state = nextstate
+
+        if self._state == _BufferBlock.PENDING:
+            self.wait_for_commit.clear()
+
+        if self._state == _BufferBlock.COMMITTED:
+            self._locator = val
+            self.buffer_view = None
+            self.buffer_block = None
+            self.wait_for_commit.set()
+
+        if self._state == _BufferBlock.ERROR:
+            self.error = val
+            self.wait_for_commit.set()
 
     @synchronized
     def state(self):
@@ -553,7 +555,7 @@ class _BlockManager(object):
         :sync:
           If `sync` is True, upload the block synchronously.
           If `sync` is False, upload the block asynchronously.  This will
-          return immediately unless if the upload queue is at capacity, in
+          return immediately unless the upload queue is at capacity, in
           which case it will wait on an upload queue slot.
 
         """
@@ -639,7 +641,6 @@ class _BlockManager(object):
             if v.owner:
                 v.owner.flush(sync=True)
 
-
     def block_prefetch(self, locator):
         """Initiate a background download of a block.
 
@@ -653,9 +654,13 @@ class _BlockManager(object):
         if not self.prefetch_enabled:
             return
 
+        if self._keep.get_from_cache(locator) is not None:
+            return
+
         with self.lock:
             if locator in self._bufferblocks:
                 return
+
         self.start_get_threads()
         self._prefetch_queue.put(locator)
 
@@ -811,19 +816,25 @@ class ArvadosFile(object):
         with self.lock:
             if size == 0 or offset >= self.size():
                 return ''
-            prefetch = locators_and_ranges(self._segments, offset, size + config.KEEP_BLOCK_SIZE)
             readsegs = locators_and_ranges(self._segments, offset, size)
+            prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE, limit=32)
 
-        for lr in prefetch:
-            self.parent._my_block_manager().block_prefetch(lr.locator)
-
+        locs = set()
         data = []
         for lr in readsegs:
             block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact))
             if block:
-                data.append(block[lr.segment_offset:lr.segment_offset+lr.segment_size])
+                blockview = memoryview(block)
+                data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size].tobytes())
+                locs.add(lr.locator)
             else:
                 break
+
+        for lr in prefetch:
+            if lr.locator not in locs:
+                self.parent._my_block_manager().block_prefetch(lr.locator)
+                locs.add(lr.locator)
+
         return ''.join(data)
 
     def _repack_writes(self, num_retries):