11220: Don't bother trying to read manifests from Keep.
authorTom Clegg <tclegg@veritasgenetics.com>
Fri, 20 Oct 2017 02:30:35 +0000 (22:30 -0400)
committerTom Clegg <tclegg@veritasgenetics.com>
Fri, 20 Oct 2017 02:30:35 +0000 (22:30 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

sdk/python/arvados/collection.py
sdk/python/tests/test_collections.py

index 7f78365fb5b837a432ab6d33a8fdf21a0dcdbe39..7b598fb2a3347ced02afe78e06206c343be08abb 100644 (file)
@@ -386,6 +386,14 @@ class CollectionWriter(CollectionBase):
             ret += locators
         return ret
 
+    def save_new(self, name=None):
+        return self._api_client.collections().create(
+            ensure_unique_name=True,
+            body={
+                'name': name,
+                'manifest_text': self.manifest_text(),
+            }).execute(num_retries=self.num_retries)
+
 
 class ResumableCollectionWriter(CollectionWriter):
     STATE_PROPS = ['_current_stream_files', '_current_stream_length',
@@ -1325,65 +1333,25 @@ class Collection(RichCollectionBase):
         # it.  If instantiation fails, we'll fall back to the except
         # clause, just like any other Collection lookup
         # failure. Return an exception, or None if successful.
-        try:
-            self._remember_api_response(self._my_api().collections().get(
-                uuid=self._manifest_locator).execute(
-                    num_retries=self.num_retries))
-            self._manifest_text = self._api_response['manifest_text']
-            self._portable_data_hash = self._api_response['portable_data_hash']
-            # If not overriden via kwargs, we should try to load the
-            # replication_desired from the API server
-            if self.replication_desired is None:
-                self.replication_desired = self._api_response.get('replication_desired', None)
-            return None
-        except Exception as e:
-            return e
-
-    def _populate_from_keep(self):
-        # Retrieve a manifest directly from Keep. This has a chance of
-        # working if [a] the locator includes a permission signature
-        # or [b] the Keep services are operating in world-readable
-        # mode. Return an exception, or None if successful.
-        try:
-            self._manifest_text = self._my_keep().get(
-                self._manifest_locator, num_retries=self.num_retries).decode()
-        except Exception as e:
-            return e
+        self._remember_api_response(self._my_api().collections().get(
+            uuid=self._manifest_locator).execute(
+                num_retries=self.num_retries))
+        self._manifest_text = self._api_response['manifest_text']
+        self._portable_data_hash = self._api_response['portable_data_hash']
+        # If not overriden via kwargs, we should try to load the
+        # replication_desired from the API server
+        if self.replication_desired is None:
+            self.replication_desired = self._api_response.get('replication_desired', None)
 
     def _populate(self):
-        if self._manifest_locator is None and self._manifest_text is None:
-            return
-        error_via_api = None
-        error_via_keep = None
-        should_try_keep = ((self._manifest_text is None) and
-                           arvados.util.keep_locator_pattern.match(
-                               self._manifest_locator))
-        if ((self._manifest_text is None) and
-            arvados.util.signed_locator_pattern.match(self._manifest_locator)):
-            error_via_keep = self._populate_from_keep()
         if self._manifest_text is None:
-            error_via_api = self._populate_from_api_server()
-            if error_via_api is not None and not should_try_keep:
-                raise error_via_api
-        if ((self._manifest_text is None) and
-            not error_via_keep and
-            should_try_keep):
-            # Looks like a keep locator, and we didn't already try keep above
-            error_via_keep = self._populate_from_keep()
-        if self._manifest_text is None:
-            # Nothing worked!
-            raise errors.NotFoundError(
-                ("Failed to retrieve collection '{}' " +
-                 "from either API server ({}) or Keep ({})."
-                 ).format(
-                    self._manifest_locator,
-                    error_via_api,
-                    error_via_keep))
-        # populate
+            if self._manifest_locator is None:
+                return
+            else:
+                self._populate_from_api_server()
         self._baseline_manifest = self._manifest_text
         self._import_manifest(self._manifest_text)
 
-
     def _has_collection_uuid(self):
         return self._manifest_locator is not None and re.match(arvados.util.collection_uuid_pattern, self._manifest_locator)
 
index 5f40bb2ec2f2f2b086a48274c1bdf25506ac3d76..4e777bdf5f94c12992bcb9eb8bf61d264fbce364 100644 (file)
@@ -36,7 +36,8 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
     @classmethod
     def setUpClass(cls):
         super(ArvadosCollectionsTest, cls).setUpClass()
-        run_test_server.authorize_with('active')
+        # need admin privileges to make collections with unsigned blocks
+        run_test_server.authorize_with('admin')
         cls.api_client = arvados.api('v1')
         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
                                              local_store=cls.local_store)
@@ -58,7 +59,7 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
                          "wrong manifest: got {}".format(cw.manifest_text()))
-        cw.finish()
+        cw.save_new()
         return cw.portable_data_hash()
 
     def test_pdh_is_native_str(self):
@@ -505,15 +506,6 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
         self.assertRaises(arvados.errors.AssertionError,
                           cwriter.write, "badtext")
 
-    def test_read_arbitrary_data_with_collection_reader(self):
-        # arv-get relies on this to do "arv-get {keep-locator} -".
-        self.write_foo_bar_baz()
-        self.assertEqual(
-            'foobar',
-            arvados.CollectionReader(
-                '3858f62230ac3c915f300c664312c63f+6'
-                ).manifest_text())
-
 
 class CollectionTestMixin(tutil.ApiClientMock):
     API_COLLECTIONS = run_test_server.fixture('collections')
@@ -569,34 +561,13 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
                                               api_client=client)
             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
 
-    def test_locator_init_fallback_to_keep(self):
-        # crunch-job needs this to read manifests that have only ever
-        # been written to Keep.
-        client = self.api_client_mock(200)
-        self.mock_get_collection(client, 404, None)
-        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
-            reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
-                                              api_client=client)
-            self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
-
-    def test_uuid_init_no_fallback_to_keep(self):
-        # Do not look up a collection UUID in Keep.
-        client = self.api_client_mock(404)
-        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
-            with self.assertRaises(arvados.errors.ApiError):
-                reader = arvados.CollectionReader(self.DEFAULT_UUID,
-                                                  api_client=client)
-
-    def test_try_keep_first_if_permission_hint(self):
-        # To verify that CollectionReader tries Keep first here, we
-        # mock API server to return the wrong data.
-        client = self.api_client_mock(200)
-        with tutil.mock_keep_responses(self.ALT_MANIFEST, 200):
-            self.assertEqual(
-                self.ALT_MANIFEST,
-                arvados.CollectionReader(
-                    self.ALT_DATA_HASH + '+Affffffffffffffffffffffffffffffffffffffff@fedcba98',
-                    api_client=client).manifest_text())
+    def test_init_no_fallback_to_keep(self):
+        # Do not look up a collection UUID or PDH in Keep.
+        for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]:
+            client = self.api_client_mock(404)
+            with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
+                with self.assertRaises(arvados.errors.ApiError):
+                    reader = arvados.CollectionReader(key, api_client=client)
 
     def test_init_num_retries_propagated(self):
         # More of an integration test...
@@ -642,15 +613,6 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
 
-    def test_api_response_with_collection_from_keep(self):
-        client = self.api_client_mock()
-        self.mock_get_collection(client, 404, 'foo')
-        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
-            reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
-                                              api_client=client)
-            api_response = reader.api_response()
-        self.assertIsNone(api_response)
-
     def check_open_file(self, coll_file, stream_name, file_name, file_size):
         self.assertFalse(coll_file.closed, "returned file is not open")
         self.assertEqual(stream_name, coll_file.stream_name())