14259: Improvements on remote blocks copying logic.
authorLucas Di Pentima <ldipentima@veritasgenetics.com>
Thu, 8 Nov 2018 19:57:13 +0000 (16:57 -0300)
committerLucas Di Pentima <ldipentima@veritasgenetics.com>
Thu, 8 Nov 2018 19:57:13 +0000 (16:57 -0300)
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima@veritasgenetics.com>

sdk/python/arvados/arvfile.py
sdk/python/arvados/collection.py

index f58c882e24f05d7cf929e8a25c2455f67ffc5125..3281d78e209db3a0e69726d285c59b456ea93035 100644 (file)
@@ -903,11 +903,35 @@ class ArvadosFile(object):
     @synchronized
     def has_remote_blocks(self):
         """Returns True if any of the segment's locators has a +R signature"""
+
         for s in self._segments:
             if '+R' in s.locator:
                 return True
         return False
 
+    @synchronized
+    def _copy_remote_blocks(self, remote_blocks={}):
+        """Ask Keep to copy remote blocks and point to their local copies.
+
+        This is called from the parent Collection.
+
+        :remote_blocks:
+            Shared cache of remote to local block mappings. This is used to avoid
+            doing extra work when blocks are shared by more than one file in
+            different subdirectories.
+        """
+
+        for s in self._segments:
+            if '+R' in s.locator:
+                try:
+                    loc = remote_blocks[s.locator]
+                except KeyError:
+                    loc = self.parent._my_keep().refresh_signature(s.locator)
+                    remote_blocks[s.locator] = loc
+                s.locator = loc
+                self.parent.set_committed(False)
+        return remote_blocks
+
     @synchronized
     def segments(self):
         return copy.copy(self._segments)
index d63e9424ee45a6e28d2d353e17a62a15aa8708b6..65e48927ce5678e39c7d2496e074dd4958eeacda 100644 (file)
@@ -549,11 +549,19 @@ class RichCollectionBase(CollectionBase):
     def has_remote_blocks(self):
         """Recursively check for a +R segment locator signature."""
 
+        if self._has_remote_blocks:
+            return True
         for item in self:
             if self[item].has_remote_blocks():
                 return True
         return False
 
+    @synchronized
+    def set_has_remote_blocks(self, val):
+        self._has_remote_blocks = val
+        if self.parent:
+            self.parent.set_has_remote_blocks(val)
+
     @must_be_writable
     @synchronized
     def find_or_create(self, path, create_type):
@@ -842,6 +850,8 @@ class RichCollectionBase(CollectionBase):
 
         self._items[target_name] = item
         self.set_committed(False)
+        if not self._has_remote_blocks and source_obj.has_remote_blocks():
+            self.set_has_remote_blocks(True)
 
         if modified_from:
             self.notify(MOD, self, target_name, (modified_from, item))
@@ -911,8 +921,6 @@ class RichCollectionBase(CollectionBase):
 
         source_obj, target_dir, target_name = self._get_src_target(source, target_path, source_collection, True)
         target_dir.add(source_obj, target_name, overwrite, False)
-        if not self._has_remote_blocks and source_obj.has_remote_blocks():
-            self._has_remote_blocks = True
 
     @must_be_writable
     @synchronized
@@ -939,8 +947,6 @@ class RichCollectionBase(CollectionBase):
         if not source_obj.writable():
             raise IOError(errno.EROFS, "Source collection is read only", source)
         target_dir.add(source_obj, target_name, overwrite, True)
-        if not self._has_remote_blocks and source_obj.has_remote_blocks():
-            self._has_remote_blocks = True
 
     def portable_manifest_text(self, stream_name="."):
         """Get the manifest text for this collection, sub collections and files.
@@ -1052,18 +1058,7 @@ class RichCollectionBase(CollectionBase):
 
         """
         for item in self:
-            if isinstance(self[item], ArvadosFile):
-                for s in self[item].segments():
-                    if '+R' in s.locator:
-                        try:
-                            loc = remote_blocks[s.locator]
-                        except KeyError:
-                            loc = self._my_keep().refresh_signature(s.locator)
-                            remote_blocks[s.locator] = loc
-                        s.locator = loc
-                        self.set_committed(False)
-            elif isinstance(self[item], RichCollectionBase):
-                remote_blocks = self[item]._copy_remote_blocks(remote_blocks)
+            remote_blocks = self[item]._copy_remote_blocks(remote_blocks)
         return remote_blocks
 
     @synchronized