X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/31d31c010bb6b5170e3962fdd50c6d393cfe6076..2aaef36d5b642e8e2b0bcd9bdca0a25217e49590:/sdk/python/tests/test_collections.py diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py index 86215f535a..cfc3665f42 100644 --- a/sdk/python/tests/test_collections.py +++ b/sdk/python/tests/test_collections.py @@ -6,9 +6,11 @@ import copy import mock import os import pprint +import random import re import sys import tempfile +import time import unittest from . import run_test_server @@ -55,6 +57,10 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, cw.finish() return cw.portable_data_hash() + def test_pdh_is_native_str(self): + pdh = self.write_foo_bar_baz() + self.assertEqual(type(''), type(pdh)) + def test_keep_local_store(self): self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put') self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get') @@ -804,12 +810,35 @@ class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin): self.assertRaises(arvados.errors.AssertionError, writer.open, 'two') +class CollectionMethods(run_test_server.TestCaseWithServers): + + def test_keys_values_items_support_indexing(self): + c = Collection() + with c.open('foo', 'wb') as f: + f.write(b'foo') + with c.open('bar', 'wb') as f: + f.write(b'bar') + self.assertEqual(2, len(c.keys())) + if sys.version_info < (3, 0): + # keys() supports indexing only for python2 callers. + fn0 = c.keys()[0] + fn1 = c.keys()[1] + else: + fn0, fn1 = c.keys() + self.assertEqual(2, len(c.values())) + f0 = c.values()[0] + f1 = c.values()[1] + self.assertEqual(2, len(c.items())) + self.assertEqual(fn0, c.items()[0][0]) + self.assertEqual(fn1, c.items()[1][0]) + + class CollectionOpenModes(run_test_server.TestCaseWithServers): def test_open_binary_modes(self): c = Collection() for mode in ['wb', 'wb+', 'ab', 'ab+']: - with c.open('foo', 'wb') as f: + with c.open('foo', mode) as f: f.write(b'foo') def test_open_invalid_modes(self): @@ -1146,6 +1175,38 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin): self.assertEqual(c1["count1.txt"].size(), 0) +class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServers): + MAIN_SERVER = {} + KEEP_SERVER = {} + + def setUp(self): + self.keep_put = getattr(arvados.keep.KeepClient, 'put') + + def test_repacked_block_sumbmission_get_permission_token(self): + ''' + Make sure that those blocks that are committed after repacking small ones, + get their permission tokens assigned on the collection manifest. + ''' + def wrapped_keep_put(*args, **kwargs): + # Simulate slow put operations + time.sleep(1) + return self.keep_put(*args, **kwargs) + + re_locator = "[0-9a-f]{32}\+\d+\+A[a-f0-9]{40}@[a-f0-9]{8}" + + with mock.patch('arvados.keep.KeepClient.put', autospec=True) as mocked_put: + mocked_put.side_effect = wrapped_keep_put + c = Collection() + # Write 70 files ~1MiB each so we force to produce 1 big block by repacking + # small ones before finishing the upload. + for i in range(70): + f = c.open("file_{}.txt".format(i), 'wb') + f.write(random.choice('abcdefghijklmnopqrstuvwxyz') * (2**20+i)) + f.close(flush=False) + # We should get 2 blocks with their tokens + self.assertEqual(len(re.findall(re_locator, c.manifest_text())), 2) + + class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers): def test_get_manifest_text_only_committed(self): c = Collection() @@ -1173,7 +1234,7 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers): def test_only_small_blocks_are_packed_together(self): c = Collection() - # Write a couple of small files, + # Write a couple of small files, f = c.open("count.txt", "wb") f.write(b"0123456789") f.close(flush=False) @@ -1188,6 +1249,64 @@ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers): c.manifest_text("."), '. 2d303c138c118af809f39319e5d507e9+34603008 a8430a058b8fbf408e1931b794dbd6fb+13 0:34603008:bigfile.txt 34603008:10:count.txt 34603018:3:foo.txt\n') + def test_flush_after_small_block_packing(self): + c = Collection() + # Write a couple of small files, + f = c.open("count.txt", "wb") + f.write(b"0123456789") + f.close(flush=False) + foo = c.open("foo.txt", "wb") + foo.write(b"foo") + foo.close(flush=False) + + self.assertEqual( + c.manifest_text(), + '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n') + + f = c.open("count.txt", "rb+") + f.close(flush=True) + + self.assertEqual( + c.manifest_text(), + '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n') + + def test_write_after_small_block_packing2(self): + c = Collection() + # Write a couple of small files, + f = c.open("count.txt", "wb") + f.write(b"0123456789") + f.close(flush=False) + foo = c.open("foo.txt", "wb") + foo.write(b"foo") + foo.close(flush=False) + + self.assertEqual( + c.manifest_text(), + '. a8430a058b8fbf408e1931b794dbd6fb+13 0:10:count.txt 10:3:foo.txt\n') + + f = c.open("count.txt", "rb+") + f.write(b"abc") + f.close(flush=False) + + self.assertEqual( + c.manifest_text(), + '. 900150983cd24fb0d6963f7d28e17f72+3 a8430a058b8fbf408e1931b794dbd6fb+13 0:3:count.txt 6:7:count.txt 13:3:foo.txt\n') + + + def test_small_block_packing_with_overwrite(self): + c = Collection() + c.open("b1", "wb").close() + c["b1"].writeto(0, b"b1", 0) + + c.open("b2", "wb").close() + c["b2"].writeto(0, b"b2", 0) + + c["b1"].writeto(0, b"1b", 0) + + self.assertEquals(c.manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1 2:2:b2\n") + self.assertEquals(c["b1"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 0:2:b1\n") + self.assertEquals(c["b2"].manifest_text(), ". ed4f3f67c70b02b29c50ce1ea26666bd+4 2:2:b2\n") + class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers): MAIN_SERVER = {} @@ -1284,6 +1403,11 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers): c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$") + def test_pdh_is_native_str(self): + c1 = self.create_count_txt() + pdh = c1.portable_data_hash() + self.assertEqual(type(''), type(pdh)) + if __name__ == '__main__': unittest.main()