Merge branch 'master' into 6518-crunch2-dispatch-slurm
[arvados.git] / sdk / python / arvados / arvfile.py
index ffe3d35b66b7af1e2eefe83386c2ac17d5a6c979..71af6445a5ec508f1953777871a345c2d3e03ca1 100644 (file)
@@ -129,7 +129,7 @@ class ArvadosFileReaderBase(_FileLikeObjectBase):
     @_FileLikeObjectBase._before_close
     @retry_method
     def decompress(self, decompress, size, num_retries=None):
-        for segment in self.readall(size, num_retries):
+        for segment in self.readall(size, num_retries=num_retries):
             data = decompress(segment)
             if data:
                 yield data
@@ -311,28 +311,30 @@ class _BufferBlock(object):
         else:
             raise AssertionError("Buffer block is not writable")
 
+    STATE_TRANSITIONS = frozenset([
+            (WRITABLE, PENDING),
+            (PENDING, COMMITTED),
+            (PENDING, ERROR),
+            (ERROR, PENDING)])
+
     @synchronized
     def set_state(self, nextstate, val=None):
-        if ((self._state == _BufferBlock.WRITABLE and nextstate == _BufferBlock.PENDING) or
-            (self._state == _BufferBlock.PENDING and nextstate == _BufferBlock.COMMITTED) or
-            (self._state == _BufferBlock.PENDING and nextstate == _BufferBlock.ERROR) or
-            (self._state == _BufferBlock.ERROR and nextstate == _BufferBlock.PENDING)):
-            self._state = nextstate
-
-            if self._state == _BufferBlock.PENDING:
-                self.wait_for_commit.clear()
-
-            if self._state == _BufferBlock.COMMITTED:
-                self._locator = val
-                self.buffer_view = None
-                self.buffer_block = None
-                self.wait_for_commit.set()
-
-            if self._state == _BufferBlock.ERROR:
-                self.error = val
-                self.wait_for_commit.set()
-        else:
-            raise StateChangeError("Invalid state change from %s to %s" % (self.state, nextstate), self.state, nextstate)
+        if (self._state, nextstate) not in self.STATE_TRANSITIONS:
+            raise StateChangeError("Invalid state change from %s to %s" % (self._state, nextstate), self._state, nextstate)
+        self._state = nextstate
+
+        if self._state == _BufferBlock.PENDING:
+            self.wait_for_commit.clear()
+
+        if self._state == _BufferBlock.COMMITTED:
+            self._locator = val
+            self.buffer_view = None
+            self.buffer_block = None
+            self.wait_for_commit.set()
+
+        if self._state == _BufferBlock.ERROR:
+            self.error = val
+            self.wait_for_commit.set()
 
     @synchronized
     def state(self):
@@ -489,7 +491,7 @@ class _BlockManager(object):
             for i in xrange(0, self.num_put_threads):
                 thread = threading.Thread(target=self._commit_bufferblock_worker)
                 self._put_threads.append(thread)
-                thread.daemon = False
+                thread.daemon = True
                 thread.start()
 
     def _block_prefetch_worker(self):
@@ -541,9 +543,6 @@ class _BlockManager(object):
     def __exit__(self, exc_type, exc_value, traceback):
         self.stop_threads()
 
-    def __del__(self):
-        self.stop_threads()
-
     def commit_bufferblock(self, block, sync):
         """Initiate a background upload of a bufferblock.
 
@@ -562,11 +561,17 @@ class _BlockManager(object):
             # Mark the block as PENDING so to disallow any more appends.
             block.set_state(_BufferBlock.PENDING)
         except StateChangeError as e:
-            if e.state == _BufferBlock.PENDING and sync:
-                block.wait_for_commit.wait()
-                if block.state() == _BufferBlock.ERROR:
-                    raise block.error
-            return
+            if e.state == _BufferBlock.PENDING:
+                if sync:
+                    block.wait_for_commit.wait()
+                else:
+                    return
+            if block.state() == _BufferBlock.COMMITTED:
+                return
+            elif block.state() == _BufferBlock.ERROR:
+                raise block.error
+            else:
+                raise
 
         if sync:
             try:
@@ -639,7 +644,6 @@ class _BlockManager(object):
             if v.owner:
                 v.owner.flush(sync=True)
 
-
     def block_prefetch(self, locator):
         """Initiate a background download of a block.
 
@@ -653,9 +657,13 @@ class _BlockManager(object):
         if not self.prefetch_enabled:
             return
 
+        if self._keep.get_from_cache(locator) is not None:
+            return
+
         with self.lock:
             if locator in self._bufferblocks:
                 return
+
         self.start_get_threads()
         self._prefetch_queue.put(locator)
 
@@ -811,19 +819,25 @@ class ArvadosFile(object):
         with self.lock:
             if size == 0 or offset >= self.size():
                 return ''
-            prefetch = locators_and_ranges(self._segments, offset, size + config.KEEP_BLOCK_SIZE)
             readsegs = locators_and_ranges(self._segments, offset, size)
+            prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE, limit=32)
 
-        for lr in prefetch:
-            self.parent._my_block_manager().block_prefetch(lr.locator)
-
+        locs = set()
         data = []
         for lr in readsegs:
             block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact))
             if block:
-                data.append(block[lr.segment_offset:lr.segment_offset+lr.segment_size])
+                blockview = memoryview(block)
+                data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size].tobytes())
+                locs.add(lr.locator)
             else:
                 break
+
+        for lr in prefetch:
+            if lr.locator not in locs:
+                self.parent._my_block_manager().block_prefetch(lr.locator)
+                locs.add(lr.locator)
+
         return ''.join(data)
 
     def _repack_writes(self, num_retries):
@@ -918,7 +932,7 @@ class ArvadosFile(object):
                 bb = self.parent._my_block_manager().get_bufferblock(s.locator)
                 if bb:
                     if bb.state() != _BufferBlock.COMMITTED:
-                        self.parent._my_block_manager().commit_bufferblock(self._current_bblock, sync=True)
+                        self.parent._my_block_manager().commit_bufferblock(bb, sync=True)
                     to_delete.add(s.locator)
                     s.locator = bb.locator()
             for s in to_delete:
@@ -1059,7 +1073,7 @@ class ArvadosFileWriter(ArvadosFileReader):
     @retry_method
     def writelines(self, seq, num_retries=None):
         for s in seq:
-            self.write(s, num_retries)
+            self.write(s, num_retries=num_retries)
 
     @_FileLikeObjectBase._before_close
     def truncate(self, size=None):