X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/9456885954b0bff02721bbb14da296be212efdc4..e67d0f5d43c56f78694ea4a5f93acec5c93cd0fb:/sdk/python/tests/test_collections.py diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py index 77ec7bb0fa..49c00191be 100644 --- a/sdk/python/tests/test_collections.py +++ b/sdk/python/tests/test_collections.py @@ -1,3 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + from __future__ import absolute_import from builtins import object @@ -6,9 +10,11 @@ import copy import mock import os import pprint +import random import re import sys import tempfile +import time import unittest from . import run_test_server @@ -30,7 +36,8 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, @classmethod def setUpClass(cls): super(ArvadosCollectionsTest, cls).setUpClass() - run_test_server.authorize_with('active') + # need admin privileges to make collections with unsigned blocks + run_test_server.authorize_with('admin') cls.api_client = arvados.api('v1') cls.keep_client = arvados.KeepClient(api_client=cls.api_client, local_store=cls.local_store) @@ -52,9 +59,13 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" + "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n", "wrong manifest: got {}".format(cw.manifest_text())) - cw.finish() + cw.save_new() return cw.portable_data_hash() + def test_pdh_is_native_str(self): + pdh = self.write_foo_bar_baz() + self.assertEqual(type(''), type(pdh)) + def test_keep_local_store(self): self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put') self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get') @@ -87,6 +98,8 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, self.assertEqual(stream0.readfrom(2**26, 0), b'', 'reading zero bytes should have returned empty string') + self.assertEqual(3, len(cr)) + self.assertTrue(cr) def _test_subset(self, collection, expected): cr = arvados.CollectionReader(collection, self.api_client) @@ -493,15 +506,6 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, self.assertRaises(arvados.errors.AssertionError, cwriter.write, "badtext") - def test_read_arbitrary_data_with_collection_reader(self): - # arv-get relies on this to do "arv-get {keep-locator} -". - self.write_foo_bar_baz() - self.assertEqual( - 'foobar', - arvados.CollectionReader( - '3858f62230ac3c915f300c664312c63f+6' - ).manifest_text()) - class CollectionTestMixin(tutil.ApiClientMock): API_COLLECTIONS = run_test_server.fixture('collections') @@ -557,34 +561,13 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin): api_client=client) self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text()) - def test_locator_init_fallback_to_keep(self): - # crunch-job needs this to read manifests that have only ever - # been written to Keep. - client = self.api_client_mock(200) - self.mock_get_collection(client, 404, None) - with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): - reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH, - api_client=client) - self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text()) - - def test_uuid_init_no_fallback_to_keep(self): - # Do not look up a collection UUID in Keep. - client = self.api_client_mock(404) - with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): - with self.assertRaises(arvados.errors.ApiError): - reader = arvados.CollectionReader(self.DEFAULT_UUID, - api_client=client) - - def test_try_keep_first_if_permission_hint(self): - # To verify that CollectionReader tries Keep first here, we - # mock API server to return the wrong data. - client = self.api_client_mock(200) - with tutil.mock_keep_responses(self.ALT_MANIFEST, 200): - self.assertEqual( - self.ALT_MANIFEST, - arvados.CollectionReader( - self.ALT_DATA_HASH + '+Affffffffffffffffffffffffffffffffffffffff@fedcba98', - api_client=client).manifest_text()) + def test_init_no_fallback_to_keep(self): + # Do not look up a collection UUID or PDH in Keep. + for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]: + client = self.api_client_mock(404) + with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): + with self.assertRaises(arvados.errors.ApiError): + reader = arvados.CollectionReader(key, api_client=client) def test_init_num_retries_propagated(self): # More of an integration test... @@ -622,21 +605,14 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin): reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0', api_client=client) self.assertEqual('', reader.manifest_text()) + self.assertEqual(0, len(reader)) + self.assertFalse(reader) def test_api_response(self): client = self.api_client_mock() reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client) self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response()) - def test_api_response_with_collection_from_keep(self): - client = self.api_client_mock() - self.mock_get_collection(client, 404, 'foo') - with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): - reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH, - api_client=client) - api_response = reader.api_response() - self.assertIsNone(api_response) - def check_open_file(self, coll_file, stream_name, file_name, file_size): self.assertFalse(coll_file.closed, "returned file is not open") self.assertEqual(stream_name, coll_file.stream_name()) @@ -1169,6 +1145,38 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin): self.assertEqual(c1["count1.txt"].size(), 0) +class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers): + MAIN_SERVER = {} + KEEP_SERVER = {} + + def setUp(self): + self.keep_put = getattr(arvados.keep.KeepClient, 'put') + + def test_repacked_block_submission_get_permission_token(self): + ''' + Make sure that those blocks that are committed after repacking small ones, + get their permission tokens assigned on the collection manifest. + ''' + def wrapped_keep_put(*args, **kwargs): + # Simulate slow put operations + time.sleep(1) + return self.keep_put(*args, **kwargs) + + re_locator = "[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}" + + with mock.patch('arvados.keep.KeepClient.put', autospec=True) as mocked_put: + mocked_put.side_effect = wrapped_keep_put + c = Collection() + # Write 70 files ~1MiB each so we force to produce 1 big block by repacking + # small ones before finishing the upload. + for i in range(70): + f = c.open("file_{}.txt".format(i), 'wb') + f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i)) + f.close(flush=False) + # We should get 2 blocks with their tokens + self.assertEqual(len(re.findall(re_locator, c.manifest_text())), 2) + + class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers): def test_get_manifest_text_only_committed(self): c = Collection() @@ -1265,9 +1273,9 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers): c["b1"].writeto(0, b"1b", 0) - self.assertEquals(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n") - self.assertEquals(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n") - self.assertEquals(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n") + self.assertEqual(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n") + self.assertEqual(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n") + self.assertEqual(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n") class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers): @@ -1365,6 +1373,11 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers): c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$") + def test_pdh_is_native_str(self): + c1 = self.create_count_txt() + pdh = c1.portable_data_hash() + self.assertEqual(type(''), type(pdh)) + if __name__ == '__main__': unittest.main()