3706: Consolidate more regular expressions into util package.
[arvados.git] / sdk / python / arvados / collection.py
index 7425d80171c929c4913a66dbe4401e13850dc003..874c38e79e7211de89e8f32983c6476b13038eaf 100644 (file)
@@ -130,13 +130,13 @@ class CollectionReader(CollectionBase):
         self._api_client = api_client
         self._keep_client = keep_client
         self.num_retries = num_retries
-        if re.match(r'[a-f0-9]{32}(\+\d+)?(\+\S+)*$', manifest_locator_or_text):
+        if re.match(util.keep_locator_pattern, manifest_locator_or_text):
             self._manifest_locator = manifest_locator_or_text
             self._manifest_text = None
-        elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}$', manifest_locator_or_text):
+        elif re.match(util.collection_uuid_pattern, manifest_locator_or_text):
             self._manifest_locator = manifest_locator_or_text
             self._manifest_text = None
-        elif re.match(r'((\S+)( +[a-f0-9]{32}(\+\d+)(\+\S+)*)+( +\d+:\d+:\S+)+$)+', manifest_locator_or_text, re.MULTILINE):
+        elif re.match(util.manifest_pattern, manifest_locator_or_text):
             self._manifest_text = manifest_locator_or_text
             self._manifest_locator = None
         else:
@@ -147,41 +147,41 @@ class CollectionReader(CollectionBase):
     def _populate(self):
         if self._streams is not None:
             return
+        error_via_api = None
+        error_via_keep = None
+        should_try_keep = (not self._manifest_text and
+                           util.keep_locator_pattern.match(
+                self._manifest_locator))
+        if (not self._manifest_text and
+            util.signed_locator_pattern.match(self._manifest_locator)):
+            try:
+                self._populate_from_keep()
+            except e:
+                error_via_keep = e
         if not self._manifest_text:
             try:
-                # As in KeepClient itself, we must wait until the last possible
-                # moment to instantiate an API client, in order to avoid
-                # tripping up clients that don't have access to an API server.
-                # If we do build one, make sure our Keep client uses it.
-                # If instantiation fails, we'll fall back to the except clause,
-                # just like any other Collection lookup failure.
-                if self._api_client is None:
-                    self._api_client = arvados.api('v1')
-                    self._keep_client = None  # Make a new one with the new api.
-                c = self._api_client.collections().get(
-                    uuid=self._manifest_locator).execute(
-                    num_retries=self.num_retries)
-                self._manifest_text = c['manifest_text']
-            except Exception as error_via_api:
-                if not util.keep_locator_pattern.match(
-                      self._manifest_locator):
+                self._populate_from_api_server()
+            except Exception as e:
+                if not should_try_keep:
                     raise
-                # _manifest_locator is a Keep locator. Perhaps we can
-                # retrieve it directly from Keep. This has a chance of
-                # working if [a] the locator includes a permission
-                # signature or [b] the Keep services are operating in
-                # world-readable mode.
-                try:
-                    self._manifest_text = self._my_keep().get(
-                        self._manifest_locator, num_retries=self.num_retries)
-                except Exception as error_via_keep:
-                    raise arvados.errors.NotFoundError(
-                        ("Failed to retrieve collection '%s' " +
-                         "from either API server (%s) or Keep (%s)."
-                         ).format(
-                                self._manifest_locator,
-                                error_via_api,
-                                error_via_keep))
+                error_via_api = e
+        if (not self._manifest_text and
+            not error_via_keep and
+            should_try_keep):
+            # Looks like a keep locator, and we didn't already try keep above
+            try:
+                self._populate_from_keep()
+            except Exception as e:
+                error_via_keep = e
+        if not self._manifest_text:
+            # Nothing worked!
+            raise arvados.errors.NotFoundError(
+                ("Failed to retrieve collection '{}' " +
+                 "from either API server ({}) or Keep ({})."
+                 ).format(
+                    self._manifest_locator,
+                    error_via_api,
+                    error_via_keep))
         self._streams = [sline.split()
                          for sline in self._manifest_text.split("\n")
                          if sline]
@@ -392,6 +392,7 @@ class CollectionWriter(CollectionBase):
                 self._current_stream_length - self._current_file_pos,
                 self._current_file_name])
         self._current_file_pos = self._current_stream_length
+        self._current_file_name = None
 
     def start_new_stream(self, newstreamname='.'):
         self.finish_current_stream()