X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/249b3abab7da1abd531195ed0cd58760bf10774b..e67d0f5d43c56f78694ea4a5f93acec5c93cd0fb:/sdk/python/tests/test_collections.py?ds=sidebyside diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py index da44ee2839..49c00191be 100644 --- a/sdk/python/tests/test_collections.py +++ b/sdk/python/tests/test_collections.py @@ -1,7 +1,8 @@ -from __future__ import absolute_import -# usage example: +# Copyright (C) The Arvados Authors. All rights reserved. # -# ARVADOS_API_TOKEN=abc ARVADOS_API_HOST=arvados.local python -m unittest discover +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import absolute_import from builtins import object import arvados @@ -9,9 +10,11 @@ import copy import mock import os import pprint +import random import re import sys import tempfile +import time import unittest from . import run_test_server @@ -33,7 +36,8 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, @classmethod def setUpClass(cls): super(ArvadosCollectionsTest, cls).setUpClass() - run_test_server.authorize_with('active') + # need admin privileges to make collections with unsigned blocks + run_test_server.authorize_with('admin') cls.api_client = arvados.api('v1') cls.keep_client = arvados.KeepClient(api_client=cls.api_client, local_store=cls.local_store) @@ -55,9 +59,13 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, ". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" + "./baz 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n", "wrong manifest: got {}".format(cw.manifest_text())) - cw.finish() + cw.save_new() return cw.portable_data_hash() + def test_pdh_is_native_str(self): + pdh = self.write_foo_bar_baz() + self.assertEqual(type(''), type(pdh)) + def test_keep_local_store(self): self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put') self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get') @@ -90,6 +98,8 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, self.assertEqual(stream0.readfrom(2**26, 0), b'', 'reading zero bytes should have returned empty string') + self.assertEqual(3, len(cr)) + self.assertTrue(cr) def _test_subset(self, collection, expected): cr = arvados.CollectionReader(collection, self.api_client) @@ -496,15 +506,6 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, self.assertRaises(arvados.errors.AssertionError, cwriter.write, "badtext") - def test_read_arbitrary_data_with_collection_reader(self): - # arv-get relies on this to do "arv-get {keep-locator} -". - self.write_foo_bar_baz() - self.assertEqual( - 'foobar', - arvados.CollectionReader( - '3858f62230ac3c915f300c664312c63f+6' - ).manifest_text()) - class CollectionTestMixin(tutil.ApiClientMock): API_COLLECTIONS = run_test_server.fixture('collections') @@ -560,34 +561,13 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin): api_client=client) self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text()) - def test_locator_init_fallback_to_keep(self): - # crunch-job needs this to read manifests that have only ever - # been written to Keep. - client = self.api_client_mock(200) - self.mock_get_collection(client, 404, None) - with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): - reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH, - api_client=client) - self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text()) - - def test_uuid_init_no_fallback_to_keep(self): - # Do not look up a collection UUID in Keep. - client = self.api_client_mock(404) - with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): - with self.assertRaises(arvados.errors.ApiError): - reader = arvados.CollectionReader(self.DEFAULT_UUID, - api_client=client) - - def test_try_keep_first_if_permission_hint(self): - # To verify that CollectionReader tries Keep first here, we - # mock API server to return the wrong data. - client = self.api_client_mock(200) - with tutil.mock_keep_responses(self.ALT_MANIFEST, 200): - self.assertEqual( - self.ALT_MANIFEST, - arvados.CollectionReader( - self.ALT_DATA_HASH + '+Affffffffffffffffffffffffffffffffffffffff@fedcba98', - api_client=client).manifest_text()) + def test_init_no_fallback_to_keep(self): + # Do not look up a collection UUID or PDH in Keep. + for key in [self.DEFAULT_UUID, self.DEFAULT_DATA_HASH]: + client = self.api_client_mock(404) + with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): + with self.assertRaises(arvados.errors.ApiError): + reader = arvados.CollectionReader(key, api_client=client) def test_init_num_retries_propagated(self): # More of an integration test... @@ -625,21 +605,14 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin): reader = arvados.CollectionReader('d41d8cd98f00b204e9800998ecf8427e+0', api_client=client) self.assertEqual('', reader.manifest_text()) + self.assertEqual(0, len(reader)) + self.assertFalse(reader) def test_api_response(self): client = self.api_client_mock() reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client) self.assertEqual(self.DEFAULT_COLLECTION, reader.api_response()) - def test_api_response_with_collection_from_keep(self): - client = self.api_client_mock() - self.mock_get_collection(client, 404, 'foo') - with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200): - reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH, - api_client=client) - api_response = reader.api_response() - self.assertIsNone(api_response) - def check_open_file(self, coll_file, stream_name, file_name, file_size): self.assertFalse(coll_file.closed, "returned file is not open") self.assertEqual(stream_name, coll_file.stream_name()) @@ -807,12 +780,35 @@ class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin): self.assertRaises(arvados.errors.AssertionError, writer.open, 'two') +class CollectionMethods(run_test_server.TestCaseWithServers): + + def test_keys_values_items_support_indexing(self): + c = Collection() + with c.open('foo', 'wb') as f: + f.write(b'foo') + with c.open('bar', 'wb') as f: + f.write(b'bar') + self.assertEqual(2, len(c.keys())) + if sys.version_info < (3, 0): + # keys() supports indexing only for python2 callers. + fn0 = c.keys()[0] + fn1 = c.keys()[1] + else: + fn0, fn1 = c.keys() + self.assertEqual(2, len(c.values())) + f0 = c.values()[0] + f1 = c.values()[1] + self.assertEqual(2, len(c.items())) + self.assertEqual(fn0, c.items()[0][0]) + self.assertEqual(fn1, c.items()[1][0]) + + class CollectionOpenModes(run_test_server.TestCaseWithServers): def test_open_binary_modes(self): c = Collection() for mode in ['wb', 'wb+', 'ab', 'ab+']: - with c.open('foo', 'wb') as f: + with c.open('foo', mode) as f: f.write(b'foo') def test_open_invalid_modes(self): @@ -1149,6 +1145,38 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin): self.assertEqual(c1["count1.txt"].size(), 0) +class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers): + MAIN_SERVER = {} + KEEP_SERVER = {} + + def setUp(self): + self.keep_put = getattr(arvados.keep.KeepClient, 'put') + + def test_repacked_block_submission_get_permission_token(self): + ''' + Make sure that those blocks that are committed after repacking small ones, + get their permission tokens assigned on the collection manifest. + ''' + def wrapped_keep_put(*args, **kwargs): + # Simulate slow put operations + time.sleep(1) + return self.keep_put(*args, **kwargs) + + re_locator = "[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}" + + with mock.patch('arvados.keep.KeepClient.put', autospec=True) as mocked_put: + mocked_put.side_effect = wrapped_keep_put + c = Collection() + # Write 70 files ~1MiB each so we force to produce 1 big block by repacking + # small ones before finishing the upload. + for i in range(70): + f = c.open("file_{}.txt".format(i), 'wb') + f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i)) + f.close(flush=False) + # We should get 2 blocks with their tokens + self.assertEqual(len(re.findall(re_locator, c.manifest_text())), 2) + + class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers): def test_get_manifest_text_only_committed(self): c = Collection() @@ -1176,7 +1204,7 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers): def test_only_small_blocks_are_packed_together(self): c = Collection() - # Write a couple of small files, + # Write a couple of small files, f = c.open("count.txt", "wb") f.write(b"0123456789") f.close(flush=False) @@ -1191,6 +1219,64 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers): c.manifest_text("."), '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n') + def test_flush_after_small_block_packing(self): + c = Collection() + # Write a couple of small files, + f = c.open("count.txt", "wb") + f.write(b"0123456789") + f.close(flush=False) + foo = c.open("foo.txt", "wb") + foo.write(b"foo") + foo.close(flush=False) + + self.assertEqual( + c.manifest_text(), + '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n') + + f = c.open("count.txt", "rb+") + f.close(flush=True) + + self.assertEqual( + c.manifest_text(), + '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n') + + def test_write_after_small_block_packing2(self): + c = Collection() + # Write a couple of small files, + f = c.open("count.txt", "wb") + f.write(b"0123456789") + f.close(flush=False) + foo = c.open("foo.txt", "wb") + foo.write(b"foo") + foo.close(flush=False) + + self.assertEqual( + c.manifest_text(), + '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n') + + f = c.open("count.txt", "rb+") + f.write(b"abc") + f.close(flush=False) + + self.assertEqual( + c.manifest_text(), + '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n') + + + def test_small_block_packing_with_overwrite(self): + c = Collection() + c.open("b1", "wb").close() + c["b1"].writeto(0, b"b1", 0) + + c.open("b2", "wb").close() + c["b2"].writeto(0, b"b2", 0) + + c["b1"].writeto(0, b"1b", 0) + + self.assertEqual(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n") + self.assertEqual(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n") + self.assertEqual(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n") + class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers): MAIN_SERVER = {} @@ -1287,6 +1373,11 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers): c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$") + def test_pdh_is_native_str(self): + c1 = self.create_count_txt() + pdh = c1.portable_data_hash() + self.assertEqual(type(''), type(pdh)) + if __name__ == '__main__': unittest.main()