X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/7317196f547cf8c5e0ec87d1526322c305faba30..bef00883acf5a11705e0c2f597a4af67a9ba71ce:/sdk/python/tests/test_collections.py diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py index de01006741..65b89056bb 100644 --- a/sdk/python/tests/test_collections.py +++ b/sdk/python/tests/test_collections.py @@ -7,22 +7,23 @@ from __future__ import absolute_import from builtins import object import arvados import copy -import mock import os -import pprint import random import re import sys -import tempfile import datetime import ciso8601 import time import unittest +import parameterized + +from unittest import mock from . import run_test_server from arvados._ranges import Range, LocatorAndRange from arvados.collection import Collection, CollectionReader from . import arvados_testutil as tutil +from .arvados_testutil import make_block_cache class TestResumableWriter(arvados.ResumableCollectionWriter): KEEP_BLOCK_SIZE = 1024 # PUT to Keep every 1K. @@ -30,9 +31,10 @@ class TestResumableWriter(arvados.ResumableCollectionWriter): def current_state(self): return self.dump_state(copy.deepcopy) - +@parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}]) class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, tutil.ArvadosBaseTestCase): + disk_cache = False MAIN_SERVER = {} @classmethod @@ -42,7 +44,8 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, run_test_server.authorize_with('admin') cls.api_client = arvados.api('v1') cls.keep_client = arvados.KeepClient(api_client=cls.api_client, - local_store=cls.local_store) + local_store=cls.local_store, + block_cache=make_block_cache(cls.disk_cache)) def write_foo_bar_baz(self): cw = arvados.CollectionWriter(self.api_client) @@ -321,8 +324,9 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers, class MockKeep(object): def __init__(self, content, num_retries=0): self.content = content + self.num_prefetch_threads = 1 - def get(self, locator, num_retries=0): + def get(self, locator, num_retries=0, prefetch=False): return self.content[locator] def test_stream_reader(self): @@ -536,11 +540,11 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin): self.mock_get_collection(client, status, 'foo_file') return client - def test_init_no_default_retries(self): + def test_init_default_retries(self): client = self.api_client_mock(200) reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client) reader.manifest_text() - client.collections().get().execute.assert_called_with(num_retries=0) + client.collections().get().execute.assert_called_with(num_retries=10) def test_uuid_init_success(self): client = self.api_client_mock(200) @@ -590,7 +594,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin): # Ensure stripped_manifest() doesn't mangle our manifest in # any way other than stripping hints. self.assertEqual( - re.sub('\+[^\d\s\+]+', '', nonnormal), + re.sub(r'\+[^\d\s\+]+', '', nonnormal), reader.stripped_manifest()) # Ensure stripped_manifest() didn't mutate our reader. self.assertEqual(nonnormal, reader.manifest_text()) @@ -898,7 +902,7 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin): c1.save_new() loc = c1.manifest_locator() c2 = Collection(loc) - self.assertEqual(c1.manifest_text, c2.manifest_text) + self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True)) self.assertEqual(c1.replication_desired, c2.replication_desired) def test_replication_desired_not_loaded_if_provided(self): @@ -907,9 +911,40 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin): c1.save_new() loc = c1.manifest_locator() c2 = Collection(loc, replication_desired=2) - self.assertEqual(c1.manifest_text, c2.manifest_text) + self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True)) self.assertNotEqual(c1.replication_desired, c2.replication_desired) + def test_storage_classes_desired_kept_on_load(self): + m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n' + c1 = Collection(m, storage_classes_desired=['archival']) + c1.save_new() + loc = c1.manifest_locator() + c2 = Collection(loc) + self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True)) + self.assertEqual(c1.storage_classes_desired(), c2.storage_classes_desired()) + + def test_storage_classes_change_after_save(self): + m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n' + c1 = Collection(m, storage_classes_desired=['archival']) + c1.save_new() + loc = c1.manifest_locator() + c2 = Collection(loc) + self.assertEqual(['archival'], c2.storage_classes_desired()) + c2.save(storage_classes=['highIO']) + self.assertEqual(['highIO'], c2.storage_classes_desired()) + c3 = Collection(loc) + self.assertEqual(c1.manifest_text(strip=True), c3.manifest_text(strip=True)) + self.assertEqual(['highIO'], c3.storage_classes_desired()) + + def test_storage_classes_desired_not_loaded_if_provided(self): + m = '. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n' + c1 = Collection(m, storage_classes_desired=['archival']) + c1.save_new() + loc = c1.manifest_locator() + c2 = Collection(loc, storage_classes_desired=['default']) + self.assertEqual(c1.manifest_text(strip=True), c2.manifest_text(strip=True)) + self.assertNotEqual(c1.storage_classes_desired(), c2.storage_classes_desired()) + def test_init_manifest(self): m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt @@ -940,6 +975,20 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin): with self.assertRaises(arvados.errors.ArgumentError): c.remove("") + def test_remove_recursive(self): + c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:a/b/c/d/efg.txt 0:10:xyz.txt\n') + self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a/b/c/d 781e5e245d69b566979b86e28d23f2c7+10 0:10:efg.txt\n", c.portable_manifest_text()) + self.assertIn("a", c) + self.assertEqual(1, len(c["a"].keys())) + # cannot remove non-empty directory with default recursive=False + with self.assertRaises(OSError): + c.remove("a/b") + with self.assertRaises(OSError): + c.remove("a/b/c/d") + c.remove("a/b", recursive=True) + self.assertEqual(0, len(c["a"].keys())) + self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:xyz.txt\n./a d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text()) + def test_find(self): c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n') self.assertIs(c.find("."), c) @@ -952,10 +1001,49 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin): self.assertIs(c.find("./nonexistant.txt"), None) self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None) + def test_escaped_paths_dont_get_unescaped_on_manifest(self): + # Dir & file names are literally '\056' (escaped form: \134056) + manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n' + c = Collection(manifest) + self.assertEqual(c.portable_manifest_text(), manifest) + + def test_other_special_chars_on_file_token(self): + cases = [ + ('\\000', '\0'), + ('\\011', '\t'), + ('\\012', '\n'), + ('\\072', ':'), + ('\\134400', '\\400'), + ] + for encoded, decoded in cases: + manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded + c = Collection(manifest) + self.assertEqual(c.portable_manifest_text(), manifest) + self.assertIn('some%sfile.txt' % decoded, c.keys()) + + def test_escaped_paths_do_get_unescaped_on_listing(self): + # Dir & file names are literally '\056' (escaped form: \134056) + manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n' + c = Collection(manifest) + self.assertIn('\\056 Test', c.keys()) + self.assertIn('\\056', c['\\056 Test'].keys()) + + def test_make_empty_dir_with_escaped_chars(self): + c = Collection() + c.mkdirs('./Empty\\056Dir') + self.assertEqual(c.portable_manifest_text(), + './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n') + + def test_make_empty_dir_with_spaces(self): + c = Collection() + c.mkdirs('./foo bar/baz waz') + self.assertEqual(c.portable_manifest_text(), + './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n') + def test_remove_in_subdir(self): c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n') c.remove("foo/count2.txt") - self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text()) + self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text()) def test_remove_empty_subdir(self): c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n') @@ -1209,6 +1297,16 @@ class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServ def setUp(self): self.keep_put = getattr(arvados.keep.KeepClient, 'put') + @mock.patch('arvados.keep.KeepClient.put', autospec=True) + def test_storage_classes_desired(self, put_mock): + put_mock.side_effect = self.keep_put + c = Collection(storage_classes_desired=['default']) + with c.open("file.txt", 'wb') as f: + f.write('content') + c.save_new() + _, kwargs = put_mock.call_args + self.assertEqual(['default'], kwargs['classes']) + @mock.patch('arvados.keep.KeepClient.put', autospec=True) def test_repacked_block_submission_get_permission_token(self, mocked_put): ''' @@ -1282,6 +1380,25 @@ class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServ class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers): + def test_preserve_version_on_save(self): + c = Collection() + c.save_new(preserve_version=True) + coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute() + self.assertEqual(coll_record['version'], 1) + self.assertEqual(coll_record['preserve_version'], True) + with c.open("foo.txt", "wb") as foo: + foo.write(b"foo") + c.save(preserve_version=True) + coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute() + self.assertEqual(coll_record['version'], 2) + self.assertEqual(coll_record['preserve_version'], True) + with c.open("bar.txt", "wb") as foo: + foo.write(b"bar") + c.save(preserve_version=False) + coll_record = arvados.api().collections().get(uuid=c.manifest_locator()).execute() + self.assertEqual(coll_record['version'], 3) + self.assertEqual(coll_record['preserve_version'], False) + def test_get_manifest_text_only_committed(self): c = Collection() with c.open("count.txt", "wb") as f: