6203: Use faster =~ instead of match.
[arvados.git] / sdk / python / arvados / collection.py
index 3641472e18e4b51e91494c5504a46a62a418cc40..30828732d8d4908ca0922bc780c11d2a6943578e 100644 (file)
@@ -822,17 +822,30 @@ class RichCollectionBase(CollectionBase):
 
         target_dir.add(source_obj, target_name, overwrite)
 
-    @synchronized
+    def portable_manifest_text(self, stream_name="."):
+        """Get the manifest text for this collection, sub collections and files.
+
+        This method does not flush outstanding blocks to Keep.  It will return
+        a normalized manifest with access tokens stripped.
+
+        :stream_name:
+          Name to use for this stream (directory)
+
+        """
+        return self._get_manifest_text(stream_name, True, True)
+
     def manifest_text(self, stream_name=".", strip=False, normalize=False):
         """Get the manifest text for this collection, sub collections and files.
 
+        This method will flush outstanding blocks to Keep.  By default, it will
+        not normalize an unmodified manifest or strip access tokens.
+
         :stream_name:
-          Name of the stream (directory)
+          Name to use for this stream (directory)
 
         :strip:
           If True, remove signing tokens from block locators if present.
-          If False (default), block locators are left unchanged.  Note: if
-          strip is False, this will also flush any open files (committing blocks to Keep).
+          If False (default), block locators are left unchanged.
 
         :normalize:
           If True, always export the manifest text in normalized form
@@ -842,10 +855,29 @@ class RichCollectionBase(CollectionBase):
 
         """
 
-        if self.modified() or self._manifest_text is None or normalize:
-            if strip is False:
-                self._my_block_manager().commit_all()
+        self._my_block_manager().commit_all()
+        return self._get_manifest_text(stream_name, strip, normalize)
+
+    @synchronized
+    def _get_manifest_text(self, stream_name, strip, normalize):
+        """Get the manifest text for this collection, sub collections and files.
 
+        :stream_name:
+          Name to use for this stream (directory)
+
+        :strip:
+          If True, remove signing tokens from block locators if present.
+          If False (default), block locators are left unchanged.
+
+        :normalize:
+          If True, always export the manifest text in normalized form
+          even if the Collection is not modified.  If False (default) and the collection
+          is not modified, return the original manifest text even if it is not
+          in normalized form.
+
+        """
+
+        if self.modified() or self._manifest_text is None or normalize:
             stream = {}
             buf = []
             sorted_keys = sorted(self.keys())
@@ -865,7 +897,7 @@ class RichCollectionBase(CollectionBase):
             if stream:
                 buf.append(" ".join(normalize_stream(stream_name, stream)) + "\n")
             for dirname in [s for s in sorted_keys if isinstance(self[s], RichCollectionBase)]:
-                buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip))
+                buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip, normalize=True))
             return "".join(buf)
         else:
             if strip:
@@ -945,7 +977,7 @@ class RichCollectionBase(CollectionBase):
 
     def portable_data_hash(self):
         """Get the portable data hash for this collection's manifest."""
-        stripped = self.manifest_text(strip=True)
+        stripped = self.portable_manifest_text()
         return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
 
     @synchronized
@@ -1253,7 +1285,8 @@ class Collection(RichCollectionBase):
         """Save collection to an existing collection record.
 
         Commit pending buffer blocks to Keep, merge with remote record (if
-        merge=True, the default), and update the collection record.
+        merge=True, the default), and update the collection record.  Returns
+        the current manifest text.
 
         Will raise AssertionError if not associated with a collection record on
         the API server.  If you want to save a manifest to Keep only, see
@@ -1271,6 +1304,8 @@ class Collection(RichCollectionBase):
             if not self._has_collection_uuid():
                 raise AssertionError("Collection manifest_locator must be a collection uuid.  Use save_new() for new collections.")
 
+            self._my_block_manager().commit_all()
+
             if merge:
                 self.update()
 
@@ -1283,21 +1318,31 @@ class Collection(RichCollectionBase):
             self._manifest_text = self._api_response["manifest_text"]
             self.set_unmodified()
 
+        return self._manifest_text
+
 
     @must_be_writable
     @synchronized
     @retry_method
-    def save_new(self, name=None, owner_uuid=None, ensure_unique_name=False, num_retries=None):
+    def save_new(self, name=None,
+                 create_collection_record=True,
+                 owner_uuid=None,
+                 ensure_unique_name=False,
+                 num_retries=None):
         """Save collection to a new collection record.
 
-        Commit pending buffer blocks to Keep and create a new collection record
-        (if create_collection_record True).  After creating a new collection
-        record, this Collection object will be associated with the new record
-        used by `save()`.
+        Commit pending buffer blocks to Keep and, when create_collection_record
+        is True (default), create a new collection record.  After creating a
+        new collection record, this Collection object will be associated with
+        the new record used by `save()`.  Returns the current manifest text.
 
         :name:
           The collection name.
 
+        :create_collection_record:
+           If True, create a collection record on the API server.
+           If False, only commit blocks to Keep and return the manifest text.
+
         :owner_uuid:
           the user, or project uuid that will own this collection.
           If None, defaults to the current user.
@@ -1311,23 +1356,27 @@ class Collection(RichCollectionBase):
           Retry count on API calls (if None,  use the collection default)
 
         """
+        self._my_block_manager().commit_all()
         text = self.manifest_text(strip=False)
 
-        if name is None:
-            name = "Collection created %s" % (time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime()))
+        if create_collection_record:
+            if name is None:
+                name = "Collection created %s" % (time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime()))
 
-        body = {"manifest_text": text,
-                "name": name}
-        if owner_uuid:
-            body["owner_uuid"] = owner_uuid
+            body = {"manifest_text": text,
+                    "name": name}
+            if owner_uuid:
+                body["owner_uuid"] = owner_uuid
 
-        self._api_response = self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries)
-        text = self._api_response["manifest_text"]
+            self._api_response = self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries)
+            text = self._api_response["manifest_text"]
 
-        self._manifest_locator = self._api_response["uuid"]
+            self._manifest_locator = self._api_response["uuid"]
 
-        self._manifest_text = text
-        self.set_unmodified()
+            self._manifest_text = text
+            self.set_unmodified()
+
+        return text
 
     @synchronized
     def subscribe(self, callback):
@@ -1377,7 +1426,7 @@ class Collection(RichCollectionBase):
                 block_locator = re.match(r'[0-9a-f]{32}\+(\d+)(\+\S+)*', tok)
                 if block_locator:
                     blocksize = long(block_locator.group(1))
-                    blocks.append(Range(tok, streamoffset, blocksize))
+                    blocks.append(Range(tok, streamoffset, blocksize, 0))
                     streamoffset += blocksize
                 else:
                     state = SEGMENTS