From: Tom Clegg Date: Fri, 20 Oct 2017 02:30:35 +0000 (-0400) Subject: 11220: Don't bother trying to read manifests from Keep. X-Git-Tag: 1.1.1~17^2~5 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/dece59a25d1063c22e01533af203e869f716de60 11220: Don't bother trying to read manifests from Keep. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py index 7f78365fb5..7b598fb2a3 100644 --- a/sdk/python/arvados/collection.py +++ b/sdk/python/arvados/collection.py @@ -386,6 +386,14 @@ class CollectionWriter(CollectionBase): ret += locators return ret + def save_new(self, name=None): + return self._api_client.collections().create( + ensure_unique_name=True, + body={ + 'name': name, + 'manifest_text': self.manifest_text(), + }).execute(num_retries=self.num_retries) + class ResumableCollectionWriter(CollectionWriter): STATE_PROPS = ['_current_stream_files', '_current_stream_length', @@ -1325,65 +1333,25 @@ class Collection(RichCollectionBase): # it. If instantiation fails, we'll fall back to the except # clause, just like any other Collection lookup # failure. Return an exception, or None if successful. - try: - self._remember_api_response(self._my_api().collections().get( - uuid=self._manifest_locator).execute( - num_retries=self.num_retries)) - self._manifest_text = self._api_response['manifest_text'] - self._portable_data_hash = self._api_response['portable_data_hash'] - # If not overriden via kwargs, we should try to load the - # replication_desired from the API server - if self.replication_desired is None: - self.replication_desired = self._api_response.get('replication_desired', None) - return None - except Exception as e: - return e - - def _populate_from_keep(self): - # Retrieve a manifest directly from Keep. This has a chance of - # working if [a] the locator includes a permission signature - # or [b] the Keep services are operating in world-readable - # mode. Return an exception, or None if successful. - try: - self._manifest_text = self._my_keep().get( - self._manifest_locator, num_retries=self.num_retries).decode() - except Exception as e: - return e + self._remember_api_response(self._my_api().collections().get( + uuid=self._manifest_locator).execute( + num_retries=self.num_retries)) + self._manifest_text = self._api_response['manifest_text'] + self._portable_data_hash = self._api_response['portable_data_hash'] + # If not overriden via kwargs, we should try to load the + # replication_desired from the API server + if self.replication_desired is None: + self.replication_desired = self._api_response.get('replication_desired', None) def _populate(self): - if self._manifest_locator is None and self._manifest_text is None: - return - error_via_api = None - error_via_keep = None - should_try_keep = ((self._manifest_text is None) and - arvados.util.keep_locator_pattern.match( - self._manifest_locator)) - if ((self._manifest_text is None) and - arvados.util.signed_locator_pattern.match(self._manifest_locator)): - error_via_keep = self._populate_from_keep() if self._manifest_text is None: - error_via_api = self._populate_from_api_server() - if error_via_api is not None and not should_try_keep: - raise error_via_api - if ((self._manifest_text is None) and - not error_via_keep and - should_try_keep): - # Looks like a keep locator, and we didn't already try keep above - error_via_keep = self._populate_from_keep() - if self._manifest_text is None: - # Nothing worked! - raise errors.NotFoundError( - ("Failed to retrieve collection '{}' " + - "from either API server ({}) or Keep ({})." - ).format( - self._manifest_locator, - error_via_api, - error_via_keep)) - # populate + if self._manifest_locator is None: + return + else: + self._populate_from_api_server() self._baseline_manifest = self._manifest_text self._import_manifest(self._manifest_text) - def _has_collection_uuid(self): return self._manifest_locator is not None and re.match(arvados.util.collection_uuid_pattern, self._manifest_locator) diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py index 5f40bb2ec2..4e777bdf5f 100644 --- a/sdk/python/tests/test_collections.py +++ b/sdk/python/tests/test_collections.py @@ -36,7 +36,8 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, @classmethod def setUpClass(cls): super(ArvadosCollectionsTest, cls).setUpClass() - run_test_server.authorize_with('active') + # need admin privileges to make collections with unsigned blocks + run_test_server.authorize_with('admin') cls.api_client = arvados.api('v1') cls.keep_client = arvados.KeepClient(api_client=cls.api_client, local_store=cls.local_store) @@ -58,7 +59,7 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" + "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n", "wrong manifest: got {}".format(cw.manifest_text())) - cw.finish() + cw.save_new() return cw.portable_data_hash() def test_pdh_is_native_str(self): @@ -505,15 +506,6 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, self.assertRaises(arvados.errors.AssertionError, cwriter.write, "badtext") - def test_read_arbitrary_data_with_collection_reader(self): - # arv-get relies on this to do "arv-get {keep-locator} -". - self.write_foo_bar_baz() - self.assertEqual( - 'foobar', - arvados.CollectionReader( - '3858f62230ac3c915f300c664312c63f+6' - ).manifest_text()) - class CollectionTestMixin(tutil.ApiClientMock): API_COLLECTIONS = run_test_server.fixture('collections') @@ -569,34 +561,13 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin): api_client=client) self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text()) - def test_locator_init_fallback_to_keep(self): - # crunch-job needs this to read manifests that have only ever - # been written to Keep. - client = self.api_client_mock(200) - self.mock_get_collection(client, 404, None) - with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): - reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH, - api_client=client) - self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text()) - - def test_uuid_init_no_fallback_to_keep(self): - # Do not look up a collection UUID in Keep. - client = self.api_client_mock(404) - with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): - with self.assertRaises(arvados.errors.ApiError): - reader = arvados.CollectionReader(self.DEFAULT_UUID, - api_client=client) - - def test_try_keep_first_if_permission_hint(self): - # To verify that CollectionReader tries Keep first here, we - # mock API server to return the wrong data. - client = self.api_client_mock(200) - with tutil.mock_keep_responses(self.ALT_MANIFEST, 200): - self.assertEqual( - self.ALT_MANIFEST, - arvados.CollectionReader( - self.ALT_DATA_HASH + '+Affffffffffffffffffffffffffffffffffffffff@fedcba98', - api_client=client).manifest_text()) + def test_init_no_fallback_to_keep(self): + # Do not look up a collection UUID or PDH in Keep. + for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]: + client = self.api_client_mock(404) + with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): + with self.assertRaises(arvados.errors.ApiError): + reader = arvados.CollectionReader(key, api_client=client) def test_init_num_retries_propagated(self): # More of an integration test... @@ -642,15 +613,6 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin): reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client) self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response()) - def test_api_response_with_collection_from_keep(self): - client = self.api_client_mock() - self.mock_get_collection(client, 404, 'foo') - with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): - reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH, - api_client=client) - api_response = reader.api_response() - self.assertIsNone(api_response) - def check_open_file(self, coll_file, stream_name, file_name, file_size): self.assertFalse(coll_file.closed, "returned file is not open") self.assertEqual(stream_name, coll_file.stream_name())