14012: Rephrases error message.
[arvados.git] / sdk / python / tests / test_collections.py
index a992328d70778099f7693e6fdf76158db0cfb6a4..ac18c44c6844c2f54fbc54d91acf094778834b3c 100644 (file)
@@ -1,3 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from __future__ import absolute_import
 
 from builtins import object
 from __future__ import absolute_import
 
 from builtins import object
@@ -10,6 +14,8 @@ import random
 import re
 import sys
 import tempfile
 import re
 import sys
 import tempfile
+import datetime
+import ciso8601
 import time
 import unittest
 
 import time
 import unittest
 
@@ -32,7 +38,8 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
     @classmethod
     def setUpClass(cls):
         super(ArvadosCollectionsTest, cls).setUpClass()
     @classmethod
     def setUpClass(cls):
         super(ArvadosCollectionsTest, cls).setUpClass()
-        run_test_server.authorize_with('active')
+        # need admin privileges to make collections with unsigned blocks
+        run_test_server.authorize_with('admin')
         cls.api_client = arvados.api('v1')
         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
                                              local_store=cls.local_store)
         cls.api_client = arvados.api('v1')
         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
                                              local_store=cls.local_store)
@@ -54,7 +61,7 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
                          "wrong manifest: got {}".format(cw.manifest_text()))
                          ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
                          "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
                          "wrong manifest: got {}".format(cw.manifest_text()))
-        cw.finish()
+        cw.save_new()
         return cw.portable_data_hash()
 
     def test_pdh_is_native_str(self):
         return cw.portable_data_hash()
 
     def test_pdh_is_native_str(self):
@@ -93,6 +100,8 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
         self.assertEqual(stream0.readfrom(2**26, 0),
                          b'',
                          'reading zero bytes should have returned empty string')
         self.assertEqual(stream0.readfrom(2**26, 0),
                          b'',
                          'reading zero bytes should have returned empty string')
+        self.assertEqual(3, len(cr))
+        self.assertTrue(cr)
 
     def _test_subset(self, collection, expected):
         cr = arvados.CollectionReader(collection, self.api_client)
 
     def _test_subset(self, collection, expected):
         cr = arvados.CollectionReader(collection, self.api_client)
@@ -499,15 +508,6 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
         self.assertRaises(arvados.errors.AssertionError,
                           cwriter.write, "badtext")
 
         self.assertRaises(arvados.errors.AssertionError,
                           cwriter.write, "badtext")
 
-    def test_read_arbitrary_data_with_collection_reader(self):
-        # arv-get relies on this to do "arv-get {keep-locator} -".
-        self.write_foo_bar_baz()
-        self.assertEqual(
-            'foobar',
-            arvados.CollectionReader(
-                '3858f62230ac3c915f300c664312c63f+6'
-                ).manifest_text())
-
 
 class CollectionTestMixin(tutil.ApiClientMock):
     API_COLLECTIONS = run_test_server.fixture('collections')
 
 class CollectionTestMixin(tutil.ApiClientMock):
     API_COLLECTIONS = run_test_server.fixture('collections')
@@ -563,34 +563,13 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
                                               api_client=client)
             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
 
                                               api_client=client)
             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
 
-    def test_locator_init_fallback_to_keep(self):
-        # crunch-job needs this to read manifests that have only ever
-        # been written to Keep.
-        client = self.api_client_mock(200)
-        self.mock_get_collection(client, 404, None)
-        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
-            reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
-                                              api_client=client)
-            self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
-
-    def test_uuid_init_no_fallback_to_keep(self):
-        # Do not look up a collection UUID in Keep.
-        client = self.api_client_mock(404)
-        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
-            with self.assertRaises(arvados.errors.ApiError):
-                reader = arvados.CollectionReader(self.DEFAULT_UUID,
-                                                  api_client=client)
-
-    def test_try_keep_first_if_permission_hint(self):
-        # To verify that CollectionReader tries Keep first here, we
-        # mock API server to return the wrong data.
-        client = self.api_client_mock(200)
-        with tutil.mock_keep_responses(self.ALT_MANIFEST, 200):
-            self.assertEqual(
-                self.ALT_MANIFEST,
-                arvados.CollectionReader(
-                    self.ALT_DATA_HASH + '+Affffffffffffffffffffffffffffffffffffffff@fedcba98',
-                    api_client=client).manifest_text())
+    def test_init_no_fallback_to_keep(self):
+        # Do not look up a collection UUID or PDH in Keep.
+        for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]:
+            client = self.api_client_mock(404)
+            with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
+                with self.assertRaises(arvados.errors.ApiError):
+                    reader = arvados.CollectionReader(key, api_client=client)
 
     def test_init_num_retries_propagated(self):
         # More of an integration test...
 
     def test_init_num_retries_propagated(self):
         # More of an integration test...
@@ -628,21 +607,14 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
         reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0',
                                           api_client=client)
         self.assertEqual('', reader.manifest_text())
         reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0',
                                           api_client=client)
         self.assertEqual('', reader.manifest_text())
+        self.assertEqual(0, len(reader))
+        self.assertFalse(reader)
 
     def test_api_response(self):
         client = self.api_client_mock()
         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
 
 
     def test_api_response(self):
         client = self.api_client_mock()
         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
         self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response())
 
-    def test_api_response_with_collection_from_keep(self):
-        client = self.api_client_mock()
-        self.mock_get_collection(client, 404, 'foo')
-        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
-            reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
-                                              api_client=client)
-            api_response = reader.api_response()
-        self.assertIsNone(api_response)
-
     def check_open_file(self, coll_file, stream_name, file_name, file_size):
         self.assertFalse(coll_file.closed, "returned file is not open")
         self.assertEqual(stream_name, coll_file.stream_name())
     def check_open_file(self, coll_file, stream_name, file_name, file_size):
         self.assertFalse(coll_file.closed, "returned file is not open")
         self.assertEqual(stream_name, coll_file.stream_name())
@@ -832,6 +804,18 @@ class CollectionMethods(run_test_server.TestCaseWithServers):
         self.assertEqual(fn0, c.items()[0][0])
         self.assertEqual(fn1, c.items()[1][0])
 
         self.assertEqual(fn0, c.items()[0][0])
         self.assertEqual(fn1, c.items()[1][0])
 
+    def test_get_properties(self):
+        c = Collection()
+        self.assertEqual(c.get_properties(), {})
+        c.save_new(properties={"foo":"bar"})
+        self.assertEqual(c.get_properties(), {"foo":"bar"})
+
+    def test_get_trash_at(self):
+        c = Collection()
+        self.assertEqual(c.get_trash_at(), None)
+        c.save_new(trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
+        self.assertEqual(c.get_trash_at(), ciso8601.parse_datetime('2111-01-01T11:11:11.111111000Z'))
+
 
 class CollectionOpenModes(run_test_server.TestCaseWithServers):
 
 
 class CollectionOpenModes(run_test_server.TestCaseWithServers):
 
@@ -1178,11 +1162,14 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}
     KEEP_SERVER = {}
 class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}
     KEEP_SERVER = {}
+    local_locator_re = r"[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
+    remote_locator_re = r"[0-9a-f]{32}\+\d+\+R[a-z]{5}-[a-f0-9]{40}@[a-f0-9]{8}"
 
     def setUp(self):
         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
 
 
     def setUp(self):
         self.keep_put = getattr(arvados.keep.KeepClient, 'put')
 
-    def test_repacked_block_submission_get_permission_token(self):
+    @mock.patch('arvados.keep.KeepClient.put', autospec=True)
+    def test_repacked_block_submission_get_permission_token(self, mocked_put):
         '''
         Make sure that those blocks that are committed after repacking small ones,
         get their permission tokens assigned on the collection manifest.
         '''
         Make sure that those blocks that are committed after repacking small ones,
         get their permission tokens assigned on the collection manifest.
@@ -1192,19 +1179,65 @@ class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServ
             time.sleep(1)
             return self.keep_put(*args, **kwargs)
 
             time.sleep(1)
             return self.keep_put(*args, **kwargs)
 
-        re_locator = "[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}"
-
-        with mock.patch('arvados.keep.KeepClient.put', autospec=True) as mocked_put:
-            mocked_put.side_effect = wrapped_keep_put
-            c = Collection()
-            # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
-            # small ones before finishing the upload.
-            for i in range(70):
-                f = c.open("file_{}.txt".format(i), 'wb')
-                f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
-                f.close(flush=False)
-            # We should get 2 blocks with their tokens
-            self.assertEqual(len(re.findall(re_locator, c.manifest_text())), 2)
+        mocked_put.side_effect = wrapped_keep_put
+        c = Collection()
+        # Write 70 files ~1MiB each so we force to produce 1 big block by repacking
+        # small ones before finishing the upload.
+        for i in range(70):
+            f = c.open("file_{}.txt".format(i), 'wb')
+            f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i))
+            f.close(flush=False)
+        # We should get 2 blocks with their tokens
+        self.assertEqual(len(re.findall(self.local_locator_re, c.manifest_text())), 2)
+
+    @mock.patch('arvados.keep.KeepClient.refresh_signature')
+    def test_copy_remote_blocks_on_save_new(self, rs_mock):
+        remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
+        local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
+        rs_mock.return_value = local_block_loc
+        c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, c.manifest_text())), 1)
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, c.manifest_text())), 0)
+        c.save_new()
+        rs_mock.assert_called()
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, c.manifest_text())), 0)
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, c.manifest_text())), 1)
+
+    @mock.patch('arvados.keep.KeepClient.refresh_signature')
+    def test_copy_remote_blocks_on_save(self, rs_mock):
+        remote_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+Remote-" + "a" * 40 + "@abcdef01"
+        local_block_loc = "acbd18db4cc2f85cedef654fccc4a4d8+3+A" + "b" * 40 + "@abcdef01"
+        rs_mock.return_value = local_block_loc
+        # Remote collection
+        remote_c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, remote_c.manifest_text())), 1)
+        # Local collection
+        local_c = Collection()
+        with local_c.open('barfile.txt', 'wb') as f:
+            f.write('bar')
+        local_c.save_new()
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
+        # Copy remote file to local collection
+        local_c.copy('./foofile.txt', './copied/foofile.txt', remote_c)
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, local_c.manifest_text())), 1)
+        # Save local collection: remote block should be copied
+        local_c.save()
+        rs_mock.assert_called()
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, local_c.manifest_text())), 2)
+        self.assertEqual(
+            len(re.findall(self.remote_locator_re, local_c.manifest_text())), 0)
 
 
 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
 
 
 class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
@@ -1303,9 +1336,9 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
 
         c["b1"].writeto(0, b"1b", 0)
 
 
         c["b1"].writeto(0, b"1b", 0)
 
-        self.assertEquals(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
-        self.assertEquals(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
-        self.assertEquals(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
+        self.assertEqual(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n")
+        self.assertEqual(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n")
+        self.assertEqual(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n")
 
 
 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
 
 
 class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
@@ -1330,17 +1363,43 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
 
     def test_create_and_save(self):
         c = self.create_count_txt()
 
     def test_create_and_save(self):
         c = self.create_count_txt()
-        c.save()
+        c.save(properties={'type' : 'Intermediate'},
+               storage_classes=['archive'],
+               trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
+
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
+        self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
+        self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
+        self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
+
 
     def test_create_and_save_new(self):
         c = self.create_count_txt()
 
     def test_create_and_save_new(self):
         c = self.create_count_txt()
-        c.save_new()
+        c.save_new(properties={'type' : 'Intermediate'},
+                   storage_classes=['archive'],
+                   trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
+
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
+        self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
+        self.assertEqual(c.api_response()["properties"], {'type' : 'Intermediate'})
+        self.assertEqual(c.api_response()["trash_at"], '2111-01-01T11:11:11.111111000Z')
+
+    def test_create_and_save_after_commiting(self):
+        c = self.create_count_txt()
+        c.save(properties={'type' : 'Intermediate'},
+               storage_classes=['hot'],
+               trash_at=datetime.datetime(2111, 1, 1, 11, 11, 11, 111111))
+        c.save(properties={'type' : 'Output'},
+               storage_classes=['cold'],
+               trash_at=datetime.datetime(2222, 2, 2, 22, 22, 22, 222222))
+
+        self.assertEqual(c.api_response()["storage_classes_desired"], ['cold'])
+        self.assertEqual(c.api_response()["properties"], {'type' : 'Output'})
+        self.assertEqual(c.api_response()["trash_at"], '2222-02-02T22:22:22.222222000Z')
 
     def test_create_diff_apply(self):
         c1 = self.create_count_txt()
 
     def test_create_diff_apply(self):
         c1 = self.create_count_txt()