From cb8320961335a4732c470882fcfcd4e6c581a0d4 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 25 Feb 2015 11:00:05 -0500 Subject: [PATCH] 4823: Add arvapi parameter to one_task_per_input_file() to solve mocking problems. Refactor Collection.copy() and add Collection.add(). Fix docstring bugs. --- sdk/python/arvados/__init__.py | 12 +++-- sdk/python/arvados/collection.py | 72 +++++++++++++++++++--------- sdk/python/tests/test_collections.py | 12 +++++ sdk/python/tests/test_sdk.py | 11 +++-- 4 files changed, 75 insertions(+), 32 deletions(-) diff --git a/sdk/python/arvados/__init__.py b/sdk/python/arvados/__init__.py index 8986e41baa..ed7e6a9764 100644 --- a/sdk/python/arvados/__init__.py +++ b/sdk/python/arvados/__init__.py @@ -83,11 +83,15 @@ class JobTask(object): class job_setup: @staticmethod - def one_task_per_input_file(if_sequence=0, and_end_task=True, input_as_path=False): + def one_task_per_input_file(if_sequence=0, and_end_task=True, input_as_path=False, arvapi=None): if if_sequence != current_task()['sequence']: return + + if not arvapi: + arvapi = api('v1') + job_input = current_job()['script_parameters']['input'] - cr = CollectionReader(job_input) + cr = CollectionReader(job_input, api_client=arvapi) cr.normalize() for s in cr.all_streams(): for f in s.all_files(): @@ -103,9 +107,9 @@ class job_setup: 'input':task_input } } - api('v1').job_tasks().create(body=new_task_attrs).execute() + arvapi.job_tasks().create(body=new_task_attrs).execute() if and_end_task: - api('v1').job_tasks().update(uuid=current_task()['uuid'], + arvapi.job_tasks().update(uuid=current_task()['uuid'], body={'success':True} ).execute() exit(0) diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py index 12fa9ae30d..4d2a9ba2f8 100644 --- a/sdk/python/arvados/collection.py +++ b/sdk/python/arvados/collection.py @@ -737,13 +737,48 @@ class SynchronizedCollectionBase(CollectionBase): def clone(self): raise NotImplementedError() + @must_be_writable + @synchronized + def add(self, source_obj, target_name, overwrite=False): + """Copy a file or subcollection to this collection. + + :source_obj: + An ArvadosFile, or Subcollection object + + :target_name: + Destination item name. If the target name already exists and is a + file, this will raise an error unless you specify `overwrite=True`. + + :overwrite: + Whether to overwrite target file if it already exists. + + """ + + if target_name in self and not overwrite: + raise IOError((errno.EEXIST, "File already exists")) + + modified_from = None + if target_name in self: + modified_from = self[target_name] + + # Actually make the copy. + dup = source_obj.clone(self) + self._items[target_name] = dup + self._modified = True + + if modified_from: + self.notify(MOD, self, target_name, (modified_from, dup)) + else: + self.notify(ADD, self, target_name, dup) + + @must_be_writable @synchronized def copy(self, source, target_path, source_collection=None, overwrite=False): """Copy a file or subcollection to a new path in this collection. :source: - An ArvadosFile, Subcollection, or string with a path to source file or subcollection + A string with a path to source file or subcollection, or an actual ArvadosFile or Subcollection object. :target_path: Destination file or path. If the target path already exists and is a @@ -781,26 +816,11 @@ class SynchronizedCollectionBase(CollectionBase): target_dir = self.find_or_create("/".join(targetcomponents[0:-1]), COLLECTION) - if target_name in target_dir: - if isinstance(target_dir[target_name], SynchronizedCollectionBase) and sourcecomponents: - target_dir = target_dir[target_name] - target_name = sourcecomponents[-1] - elif not overwrite: - raise IOError((errno.EEXIST, "File already exists")) - - modified_from = None - if target_name in target_dir: - modified_from = target_dir[target_name] + if target_name in target_dir and isinstance(self[target_name], SynchronizedCollectionBase) and sourcecomponents: + target_dir = target_dir[target_name] + target_name = sourcecomponents[-1] - # Actually make the copy. - dup = source_obj.clone(target_dir) - target_dir._items[target_name] = dup - target_dir._modified = True - - if modified_from: - self.notify(MOD, target_dir, target_name, (modified_from, dup)) - else: - self.notify(ADD, target_dir, target_name, dup) + target_dir.add(source_obj, target_name, overwrite) @synchronized def manifest_text(self, stream_name=".", strip=False, normalize=False): @@ -1223,10 +1243,13 @@ class Collection(SynchronizedCollectionBase): the API server. If you want to save a manifest to Keep only, see `save_new()`. - :update: + :merge: Update and merge remote changes before saving. Otherwise, any remote changes will be ignored and overwritten. + :num_retries: + Retry count on API calls (if None, use the collection default) + """ if self.modified(): if not self._has_collection_uuid(): @@ -1260,8 +1283,8 @@ class Collection(SynchronizedCollectionBase): :name: The collection name. - :keep_only: - Only save the manifest to keep, do not create a collection record. + :create_collection_record: + If True, create a collection record. If False, only save the manifest to keep. :owner_uuid: the user, or project uuid that will own this collection. @@ -1272,6 +1295,9 @@ class Collection(SynchronizedCollectionBase): if it conflicts with a collection with the same name and owner. If False, a name conflict will result in an error. + :num_retries: + Retry count on API calls (if None, use the collection default) + """ self._my_block_manager().commit_all() self._my_keep().put(self.manifest_text(strip=True), num_retries=num_retries) diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py index cba14adcc7..ff30a7bcd6 100644 --- a/sdk/python/tests/test_collections.py +++ b/sdk/python/tests/test_collections.py @@ -835,6 +835,18 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin): c.remove("count1.txt") self.assertNotIn("count1.txt", c) self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text()) + with self.assertRaises(arvados.errors.ArgumentError): + c.remove("") + + def test_find(self): + c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n') + self.assertIs(c.find("."), c) + self.assertIs(c.find("./count1.txt"), c["count1.txt"]) + self.assertIs(c.find("count1.txt"), c["count1.txt"]) + with self.assertRaises(IOError): + c.find("/.") + with self.assertRaises(arvados.errors.ArgumentError): + c.find("") def test_remove_in_subdir(self): c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n') diff --git a/sdk/python/tests/test_sdk.py b/sdk/python/tests/test_sdk.py index 9b9f3fccef..8c56180059 100644 --- a/sdk/python/tests/test_sdk.py +++ b/sdk/python/tests/test_sdk.py @@ -7,10 +7,11 @@ import arvados.collection class TestSDK(unittest.TestCase): - @mock.patch('arvados.api') @mock.patch('arvados.current_task') @mock.patch('arvados.current_job') - def test_one_task_per_input_file_normalize(self, mock_job, mock_task, mock_api): + def test_one_task_per_input_file_normalize(self, mock_job, mock_task): + mock_api = mock.MagicMock() + # This manifest will be reduced from three lines to one when it is # normalized. nonnormalized_manifest = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt @@ -30,7 +31,7 @@ class TestSDK(unittest.TestCase): 'sequence': 0, } # mock the API client to return a collection with a nonnormalized manifest. - mock_api('v1').collections().get().execute.return_value = { + mock_api.collections().get().execute.return_value = { 'uuid': 'zzzzz-4zz18-mockcollection0', 'portable_data_hash': dummy_hash, 'manifest_text': nonnormalized_manifest, @@ -38,5 +39,5 @@ class TestSDK(unittest.TestCase): # Because one_task_per_input_file normalizes this collection, # it should now create only one job task and not three. - arvados.job_setup.one_task_per_input_file(and_end_task=False) - mock_api('v1').job_tasks().create().execute.assert_called_once_with() + arvados.job_setup.one_task_per_input_file(and_end_task=False, arvapi=mock_api) + mock_api.job_tasks().create().execute.assert_called_once_with() -- 2.30.2