9463: Get Collection's manifest text including only those committed blocks, so that...
authorLucas Di Pentima <lucas@curoverse.com>
Mon, 15 Aug 2016 20:45:19 +0000 (17:45 -0300)
committerLucas Di Pentima <lucas@curoverse.com>
Mon, 15 Aug 2016 20:45:19 +0000 (17:45 -0300)
sdk/python/arvados/collection.py
sdk/python/arvados/commands/put.py
sdk/python/tests/test_collections.py

index 62b6526d9432033ad700f107418a2507255922e3..56d8b239331a8f65e8b5781376719244c03b2905 100644 (file)
@@ -922,7 +922,7 @@ class RichCollectionBase(CollectionBase):
         return self._get_manifest_text(stream_name, strip, normalize)
 
     @synchronized
-    def _get_manifest_text(self, stream_name, strip, normalize):
+    def _get_manifest_text(self, stream_name, strip, normalize, only_committed=False):
         """Get the manifest text for this collection, sub collections and files.
 
         :stream_name:
@@ -938,6 +938,9 @@ class RichCollectionBase(CollectionBase):
           is not modified, return the original manifest text even if it is not
           in normalized form.
 
+        :only_committed:
+          If True, only include blocks that were already committed to Keep.
+
         """
 
         if not self.committed() or self._manifest_text is None or normalize:
@@ -951,6 +954,8 @@ class RichCollectionBase(CollectionBase):
                 for segment in arvfile.segments():
                     loc = segment.locator
                     if arvfile.parent._my_block_manager().is_bufferblock(loc):
+                        if only_committed:
+                            continue
                         loc = arvfile.parent._my_block_manager().get_bufferblock(loc).locator()
                     if strip:
                         loc = KeepLocator(loc).stripped()
index 43e3813255faf02025c99acc3349bdfdc75399fc..4034173681503767d3ba45beb7da3ae940dbf237 100644 (file)
@@ -389,7 +389,7 @@ class ArvPutUploadJob(object):
             if self.resume:
                 with self._state_lock:
                     # Get the manifest text without comitting pending blocks
-                    self._state['manifest'] = self._my_collection()._get_manifest_text(".", strip=False, normalize=False)
+                    self._state['manifest'] = self._my_collection()._get_manifest_text(".", strip=False, normalize=False, only_committed=True)
         if self.resume:
             self._save_state()
         # Call the reporter, if any
index 41c8c011f0ebf01b4cefa4c18117be2ed4eb024a..668f2e20439c323763c86651443faae8fa1b9c84 100644 (file)
@@ -1082,6 +1082,27 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         self.assertEqual(c1["count1.txt"].size(), 0)
 
 
+class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
+    def test_get_manifest_text_only_committed(self):
+        c = Collection()
+        with c.open("count.txt", "w") as f:
+            f.write("0123456789")
+            # Block not written to keep yet
+            self.assertNotEqual(
+                c._get_manifest_text(".",
+                                     strip=False,
+                                     normalize=False,
+                                     only_committed=True),
+                ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
+        # And now with the file closed...
+        self.assertEqual(
+            c._get_manifest_text(".",
+                                 strip=False,
+                                 normalize=False,
+                                 only_committed=True),
+            ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
+
+
 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}
     KEEP_SERVER = {}