self._api_client = api_client
self._keep_client = keep_client
self.num_retries = num_retries
- if re.match(r'[a-f0-9]{32}(\+\d+)?(\+\S+)*$', manifest_locator_or_text):
+ if re.match(util.keep_locator_pattern, manifest_locator_or_text):
self._manifest_locator = manifest_locator_or_text
self._manifest_text = None
- elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}$', manifest_locator_or_text):
+ elif re.match(util.collection_uuid_pattern, manifest_locator_or_text):
self._manifest_locator = manifest_locator_or_text
self._manifest_text = None
- elif re.match(r'((\S+)( +[a-f0-9]{32}(\+\d+)(\+\S+)*)+( +\d+:\d+:\S+)+$)+', manifest_locator_or_text, re.MULTILINE):
+ elif re.match(util.manifest_pattern, manifest_locator_or_text):
self._manifest_text = manifest_locator_or_text
self._manifest_locator = None
else:
def _populate(self):
if self._streams is not None:
return
+ error_via_api = None
+ error_via_keep = None
+ should_try_keep = (not self._manifest_text and
+ util.keep_locator_pattern.match(
+ self._manifest_locator))
+ if (not self._manifest_text and
+ util.signed_locator_pattern.match(self._manifest_locator)):
+ try:
+ self._populate_from_keep()
+ except e:
+ error_via_keep = e
if not self._manifest_text:
try:
- # As in KeepClient itself, we must wait until the last possible
- # moment to instantiate an API client, in order to avoid
- # tripping up clients that don't have access to an API server.
- # If we do build one, make sure our Keep client uses it.
- # If instantiation fails, we'll fall back to the except clause,
- # just like any other Collection lookup failure.
- if self._api_client is None:
- self._api_client = arvados.api('v1')
- self._keep_client = None # Make a new one with the new api.
- c = self._api_client.collections().get(
- uuid=self._manifest_locator).execute(
- num_retries=self.num_retries)
- self._manifest_text = c['manifest_text']
- except Exception as error_via_api:
- if not util.keep_locator_pattern.match(
- self._manifest_locator):
+ self._populate_from_api_server()
+ except Exception as e:
+ if not should_try_keep:
raise
- # _manifest_locator is a Keep locator. Perhaps we can
- # retrieve it directly from Keep. This has a chance of
- # working if [a] the locator includes a permission
- # signature or [b] the Keep services are operating in
- # world-readable mode.
- try:
- self._manifest_text = self._my_keep().get(
- self._manifest_locator, num_retries=self.num_retries)
- except Exception as error_via_keep:
- raise arvados.errors.NotFoundError(
- ("Failed to retrieve collection '%s' " +
- "from either API server (%s) or Keep (%s)."
- ).format(
- self._manifest_locator,
- error_via_api,
- error_via_keep))
+ error_via_api = e
+ if (not self._manifest_text and
+ not error_via_keep and
+ should_try_keep):
+ # Looks like a keep locator, and we didn't already try keep above
+ try:
+ self._populate_from_keep()
+ except Exception as e:
+ error_via_keep = e
+ if not self._manifest_text:
+ # Nothing worked!
+ raise arvados.errors.NotFoundError(
+ ("Failed to retrieve collection '{}' " +
+ "from either API server ({}) or Keep ({})."
+ ).format(
+ self._manifest_locator,
+ error_via_api,
+ error_via_keep))
self._streams = [sline.split()
for sline in self._manifest_text.split("\n")
if sline]
self._current_stream_length - self._current_file_pos,
self._current_file_name])
self._current_file_pos = self._current_stream_length
+ self._current_file_name = None
def start_new_stream(self, newstreamname='.'):
self.finish_current_stream()