X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/05bea2c50474edeb9d0e3fb8daaf838b58ea9a54..af2c75821456e28875dc8006efb03f38ec1a74b8:/sdk/python/arvados/collection.py diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py index 40b5889c31..496136ebe3 100644 --- a/sdk/python/arvados/collection.py +++ b/sdk/python/arvados/collection.py @@ -92,13 +92,16 @@ def normalize(collection): class CollectionReader(object): - def __init__(self, manifest_locator_or_text, api_client=None): + def __init__(self, manifest_locator_or_text, api_client=None, keep_client=None): self._api_client = api_client - self._keep_client = None + self._keep_client = keep_client if re.match(r'[a-f0-9]{32}(\+\d+)?(\+\S+)*$', manifest_locator_or_text): self._manifest_locator = manifest_locator_or_text self._manifest_text = None - elif re.match(r'(\S+)( [a-f0-9]{32}(\+\d+)(\+\S+)*)+( \d+:\d+:\S+)+\n', manifest_locator_or_text): + elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}$', manifest_locator_or_text): + self._manifest_locator = manifest_locator_or_text + self._manifest_text = None + elif re.match(r'((\S+)( +[a-f0-9]{32}(\+\d+)(\+\S+)*)+( +\d+:\d+:\S+)+$)+', manifest_locator_or_text, re.MULTILINE): self._manifest_text = manifest_locator_or_text self._manifest_locator = None else: @@ -116,17 +119,29 @@ class CollectionReader(object): if self._streams is not None: return if not self._manifest_text: - if self._api_client is None: - self._api_client = arvados.api('v1') - if self._keep_client is None: - self._keep_client = KeepClient(api_client=self._api_client) try: + # As in KeepClient itself, we must wait until the last possible + # moment to instantiate an API client, in order to avoid + # tripping up clients that don't have access to an API server. + # If we do build one, make sure our Keep client uses it. + # If instantiation fails, we'll fall back to the except clause, + # just like any other Collection lookup failure. + if self._api_client is None: + self._api_client = arvados.api('v1') + self._keep_client = KeepClient(api_client=self._api_client) + if self._keep_client is None: + self._keep_client = KeepClient(api_client=self._api_client) c = self._api_client.collections().get( uuid=self._manifest_locator).execute() self._manifest_text = c['manifest_text'] except Exception as e: + if not util.portable_data_hash_pattern.match( + self._manifest_locator): + raise _logger.warning("API lookup failed for collection %s (%s: %s)", self._manifest_locator, type(e), str(e)) + if self._keep_client is None: + self._keep_client = KeepClient(api_client=self._api_client) self._manifest_text = self._keep_client.get(self._manifest_locator) self._streams = [] for stream_line in self._manifest_text.split("\n"): @@ -138,7 +153,7 @@ class CollectionReader(object): # now regenerate the manifest text based on the normalized stream #print "normalizing", self._manifest_text - self._manifest_text = ''.join([StreamReader(stream).manifest_text() for stream in self._streams]) + self._manifest_text = ''.join([StreamReader(stream, keep=self._keep_client).manifest_text() for stream in self._streams]) #print "result", self._manifest_text @@ -146,7 +161,7 @@ class CollectionReader(object): self._populate() resp = [] for s in self._streams: - resp.append(StreamReader(s)) + resp.append(StreamReader(s, keep=self._keep_client)) return resp def all_files(self): @@ -157,7 +172,7 @@ class CollectionReader(object): def manifest_text(self, strip=False): self._populate() if strip: - m = ''.join([StreamReader(stream).manifest_text(strip=True) for stream in self._streams]) + m = ''.join([StreamReader(stream, keep=self._keep_client).manifest_text(strip=True) for stream in self._streams]) return m else: return self._manifest_text