X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/bdbf7c4ee346fd1615f882255b3606d8d5360710..16b5f7275ffa2bd4347134f7269744f4cd4baa2a:/sdk/cwl/tests/test_container.py diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py index de21fc0b92..d331e3552d 100644 --- a/sdk/cwl/tests/test_container.py +++ b/sdk/cwl/tests/test_container.py @@ -18,10 +18,11 @@ import os import functools import cwltool.process import cwltool.secrets +from cwltool.update import INTERNAL_VERSION from schema_salad.ref_resolver import Loader from schema_salad.sourceline import cmap -from .matcher import JsonDiffMatcher +from .matcher import JsonDiffMatcher, StripYAMLComments from .mock_discovery import get_rootDesc if not os.getenv('ARVADOS_DEBUG'): @@ -56,8 +57,11 @@ class CollectionMock(object): class TestContainer(unittest.TestCase): + def setUp(self): + cwltool.process._names = set() + def helper(self, runner, enable_reuse=True): - document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") + document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema(INTERNAL_VERSION) make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) @@ -66,7 +70,7 @@ class TestContainer(unittest.TestCase): "basedir": "", "make_fs_access": make_fs_access, "loader": Loader({}), - "metadata": {"cwlVersion": "v1.0"}}) + "metadata": {"cwlVersion": INTERNAL_VERSION, "http://commonwl.org/cwltool#original_cwlVersion": "v1.0"}}) runtimeContext = arvados_cwl.context.ArvRuntimeContext( {"work_api": "containers", "basedir": "", @@ -80,6 +84,22 @@ class TestContainer(unittest.TestCase): return loadingContext, runtimeContext + # Helper function to set up the ArvCwlExecutor to use the containers api + # and test that the RuntimeStatusLoggingHandler is set up correctly + def setup_and_test_container_executor_and_logging(self, gcc_mock) : + api = mock.MagicMock() + api._rootDesc = copy.deepcopy(get_rootDesc()) + + # Make sure ArvCwlExecutor thinks it's running inside a container so it + # adds the logging handler that will call runtime_status_update() mock + self.assertFalse(gcc_mock.called) + runner = arvados_cwl.ArvCwlExecutor(api) + self.assertEqual(runner.work_api, 'containers') + root_logger = logging.getLogger('') + handlerClasses = [h.__class__ for h in root_logger.handlers] + self.assertTrue(arvados_cwl.RuntimeStatusLoggingHandler in handlerClasses) + return runner + # The test passes no builder.resources # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024} @mock.patch("arvados.commands.keepdocker.list_images_in_arv") @@ -307,7 +327,7 @@ class TestContainer(unittest.TestCase): call_args, call_kwargs = runner.api.container_requests().create.call_args vwdmock.copy.assert_has_calls([mock.call('bar', 'foo', source_collection=sourcemock)]) - vwdmock.copy.assert_has_calls([mock.call('', 'foo2', source_collection=sourcemock)]) + vwdmock.copy.assert_has_calls([mock.call('.', 'foo2', source_collection=sourcemock)]) vwdmock.copy.assert_has_calls([mock.call('baz/filename', 'filename', source_collection=sourcemock)]) vwdmock.copy.assert_has_calls([mock.call('subdir', 'subdir', source_collection=sourcemock)]) @@ -383,7 +403,7 @@ class TestContainer(unittest.TestCase): runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} - document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") + document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema(INTERNAL_VERSION) tool = cmap({ "inputs": [], @@ -496,32 +516,53 @@ class TestContainer(unittest.TestCase): self.assertFalse(api.collections().create.called) self.assertFalse(runner.runtime_status_error.called) - arvjob.collect_outputs.assert_called_with("keep:abc+123") + arvjob.collect_outputs.assert_called_with("keep:abc+123", 0) arvjob.output_callback.assert_called_with({"out": "stuff"}, "success") runner.add_intermediate_output.assert_called_with("zzzzz-4zz18-zzzzzzzzzzzzzz2") + # Test to make sure we dont call runtime_status_update if we already did + # some where higher up in the call stack + @mock.patch("arvados_cwl.util.get_current_container") + def test_recursive_runtime_status_update(self, gcc_mock): + self.setup_and_test_container_executor_and_logging(gcc_mock) + root_logger = logging.getLogger('') + + # get_current_container is invoked when we call runtime_status_update + # so try and log again! + gcc_mock.side_effect = lambda *args: root_logger.error("Second Error") + try: + root_logger.error("First Error") + except RuntimeError: + self.fail("RuntimeStatusLoggingHandler should not be called recursively") + + + # Test to make sure that an exception raised from + # get_current_container doesn't cause the logger to raise an + # exception + @mock.patch("arvados_cwl.util.get_current_container") + def test_runtime_status_get_current_container_exception(self, gcc_mock): + self.setup_and_test_container_executor_and_logging(gcc_mock) + root_logger = logging.getLogger('') + + # get_current_container is invoked when we call + # runtime_status_update, it is going to also raise an + # exception. + gcc_mock.side_effect = Exception("Second Error") + try: + root_logger.error("First Error") + except Exception: + self.fail("Exception in logger should not propagate") + self.assertTrue(gcc_mock.called) + + @mock.patch("arvados_cwl.ArvCwlExecutor.runtime_status_update") @mock.patch("arvados_cwl.util.get_current_container") @mock.patch("arvados.collection.CollectionReader") @mock.patch("arvados.collection.Collection") - def test_child_failure(self, col, reader, gcc_mock): - api = mock.MagicMock() - api._rootDesc = copy.deepcopy(get_rootDesc()) - del api._rootDesc.get('resources')['jobs']['methods']['create'] - - # Set up runner with mocked runtime_status_update() - self.assertFalse(gcc_mock.called) - runtime_status_update = mock.MagicMock() - arvados_cwl.ArvCwlExecutor.runtime_status_update = runtime_status_update - runner = arvados_cwl.ArvCwlExecutor(api) - self.assertEqual(runner.work_api, 'containers') + def test_child_failure(self, col, reader, gcc_mock, rts_mock): + runner = self.setup_and_test_container_executor_and_logging(gcc_mock) - # Make sure ArvCwlExecutor thinks it's running inside a container so it - # adds the logging handler that will call runtime_status_update() mock gcc_mock.return_value = {"uuid" : "zzzzz-dz642-zzzzzzzzzzzzzzz"} self.assertTrue(gcc_mock.called) - root_logger = logging.getLogger('') - handlerClasses = [h.__class__ for h in root_logger.handlers] - self.assertTrue(arvados_cwl.RuntimeStatusLoggingHandler in handlerClasses) runner.num_retries = 0 runner.ignore_docker_for_reuse = False @@ -565,7 +606,7 @@ class TestContainer(unittest.TestCase): "modified_at": "2017-05-26T12:01:22Z" }) - runtime_status_update.assert_called_with( + rts_mock.assert_called_with( 'error', 'arvados.cwl-runner: [container testjob] (zzzzz-xvhdp-zzzzzzzzzzzzzzz) error log:', ' ** log is empty **' @@ -588,7 +629,7 @@ class TestContainer(unittest.TestCase): "portable_data_hash": "99999999999999999999999999999994+99", "manifest_text": ". 99999999999999999999999999999994+99 0:0:file1 0:0:file2"} - document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") + document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.1") tool = cmap({ "inputs": [ @@ -611,6 +652,7 @@ class TestContainer(unittest.TestCase): "p1": { "class": "Directory", "location": "keep:99999999999999999999999999999994+44", + "http://arvados.org/cwl#collectionUUID": "zzzzz-4zz18-zzzzzzzzzzzzzzz", "listing": [ { "class": "File", @@ -641,7 +683,8 @@ class TestContainer(unittest.TestCase): 'mounts': { "/keep/99999999999999999999999999999994+44": { "kind": "collection", - "portable_data_hash": "99999999999999999999999999999994+44" + "portable_data_hash": "99999999999999999999999999999994+44", + "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzz" }, '/tmp': {'kind': 'tmp', "capacity": 1073741824 }, @@ -676,7 +719,7 @@ class TestContainer(unittest.TestCase): runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} - document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") + document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.1") tool = cmap({"arguments": ["md5sum", "example.conf"], "class": "CommandLineTool", @@ -782,7 +825,7 @@ class TestContainer(unittest.TestCase): "class": "CommandLineTool", "hints": [ { - "class": "http://commonwl.org/cwltool#TimeLimit", + "class": "ToolTimeLimit", "timelimit": 42 } ] @@ -799,3 +842,253 @@ class TestContainer(unittest.TestCase): _, kwargs = runner.api.container_requests().create.call_args self.assertEqual(42, kwargs['body']['scheduling_parameters'].get('max_run_time')) + + +class TestWorkflow(unittest.TestCase): + def setUp(self): + cwltool.process._names = set() + + def helper(self, runner, enable_reuse=True): + document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") + + make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, + collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) + + document_loader.fetcher_constructor = functools.partial(arvados_cwl.CollectionFetcher, api_client=runner.api, fs_access=make_fs_access("")) + document_loader.fetcher = document_loader.fetcher_constructor(document_loader.cache, document_loader.session) + document_loader.fetch_text = document_loader.fetcher.fetch_text + document_loader.check_exists = document_loader.fetcher.check_exists + + loadingContext = arvados_cwl.context.ArvLoadingContext( + {"avsc_names": avsc_names, + "basedir": "", + "make_fs_access": make_fs_access, + "loader": document_loader, + "metadata": {"cwlVersion": INTERNAL_VERSION, "http://commonwl.org/cwltool#original_cwlVersion": "v1.0"}, + "construct_tool_object": runner.arv_make_tool}) + runtimeContext = arvados_cwl.context.ArvRuntimeContext( + {"work_api": "containers", + "basedir": "", + "name": "test_run_wf_"+str(enable_reuse), + "make_fs_access": make_fs_access, + "tmpdir": "/tmp", + "enable_reuse": enable_reuse, + "priority": 500}) + + return loadingContext, runtimeContext + + # The test passes no builder.resources + # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024} + @mock.patch("arvados.collection.CollectionReader") + @mock.patch("arvados.collection.Collection") + @mock.patch('arvados.commands.keepdocker.list_images_in_arv') + def test_run(self, list_images_in_arv, mockcollection, mockcollectionreader): + arv_docker_clear_cache() + arvados_cwl.add_arv_hints() + + api = mock.MagicMock() + api._rootDesc = get_rootDesc() + + runner = arvados_cwl.executor.ArvCwlExecutor(api) + self.assertEqual(runner.work_api, 'containers') + + list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] + runner.api.collections().get().execute.return_value = {"portable_data_hash": "99999999999999999999999999999993+99"} + runner.api.collections().list().execute.return_value = {"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzz", + "portable_data_hash": "99999999999999999999999999999993+99"}]} + + runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" + runner.ignore_docker_for_reuse = False + runner.num_retries = 0 + runner.secret_store = cwltool.secrets.SecretStore() + + loadingContext, runtimeContext = self.helper(runner) + runner.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir) + + tool, metadata = loadingContext.loader.resolve_ref("tests/wf/scatter2.cwl") + metadata["cwlVersion"] = tool["cwlVersion"] + + mockc = mock.MagicMock() + mockcollection.side_effect = lambda *args, **kwargs: CollectionMock(mockc, *args, **kwargs) + mockcollectionreader().find.return_value = arvados.arvfile.ArvadosFile(mock.MagicMock(), "token.txt") + + arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, loadingContext) + arvtool.formatgraph = None + it = arvtool.job({}, mock.MagicMock(), runtimeContext) + + next(it).run(runtimeContext) + next(it).run(runtimeContext) + + with open("tests/wf/scatter2_subwf.cwl") as f: + subwf = StripYAMLComments(f.read()).rstrip() + + runner.api.container_requests().create.assert_called_with( + body=JsonDiffMatcher({ + "command": [ + "cwltool", + "--no-container", + "--move-outputs", + "--preserve-entire-environment", + "workflow.cwl", + "cwl.input.yml" + ], + "container_image": "99999999999999999999999999999993+99", + "cwd": "/var/spool/cwl", + "environment": { + "HOME": "/var/spool/cwl", + "TMPDIR": "/tmp" + }, + "mounts": { + "/keep/99999999999999999999999999999999+118": { + "kind": "collection", + "portable_data_hash": "99999999999999999999999999999999+118" + }, + "/tmp": { + "capacity": 1073741824, + "kind": "tmp" + }, + "/var/spool/cwl": { + "capacity": 1073741824, + "kind": "tmp" + }, + "/var/spool/cwl/cwl.input.yml": { + "kind": "collection", + "path": "cwl.input.yml", + "portable_data_hash": "99999999999999999999999999999996+99" + }, + "/var/spool/cwl/workflow.cwl": { + "kind": "collection", + "path": "workflow.cwl", + "portable_data_hash": "99999999999999999999999999999996+99" + }, + "stdout": { + "kind": "file", + "path": "/var/spool/cwl/cwl.output.json" + } + }, + "name": "scatterstep", + "output_name": "Output for step scatterstep", + "output_path": "/var/spool/cwl", + "output_ttl": 0, + "priority": 500, + "properties": {}, + "runtime_constraints": { + "ram": 1073741824, + "vcpus": 1 + }, + "scheduling_parameters": {}, + "secret_mounts": {}, + "state": "Committed", + "use_existing": True + })) + mockc.open().__enter__().write.assert_has_calls([mock.call(subwf)]) + mockc.open().__enter__().write.assert_has_calls([mock.call( +'''{ + "fileblub": { + "basename": "token.txt", + "class": "File", + "location": "/keep/99999999999999999999999999999999+118/token.txt", + "size": 0 + }, + "sleeptime": 5 +}''')]) + + # The test passes no builder.resources + # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024} + @mock.patch("arvados.collection.CollectionReader") + @mock.patch("arvados.collection.Collection") + @mock.patch('arvados.commands.keepdocker.list_images_in_arv') + def test_overall_resource_singlecontainer(self, list_images_in_arv, mockcollection, mockcollectionreader): + arv_docker_clear_cache() + arvados_cwl.add_arv_hints() + + api = mock.MagicMock() + api._rootDesc = get_rootDesc() + + runner = arvados_cwl.executor.ArvCwlExecutor(api) + self.assertEqual(runner.work_api, 'containers') + + list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] + runner.api.collections().get().execute.return_value = {"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzz", + "portable_data_hash": "99999999999999999999999999999993+99"} + runner.api.collections().list().execute.return_value = {"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzz", + "portable_data_hash": "99999999999999999999999999999993+99"}]} + + runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" + runner.ignore_docker_for_reuse = False + runner.num_retries = 0 + runner.secret_store = cwltool.secrets.SecretStore() + + loadingContext, runtimeContext = self.helper(runner) + runner.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir) + loadingContext.do_update = True + tool, metadata = loadingContext.loader.resolve_ref("tests/wf/echo-wf.cwl") + + mockcollection.side_effect = lambda *args, **kwargs: CollectionMock(mock.MagicMock(), *args, **kwargs) + + arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, loadingContext) + arvtool.formatgraph = None + it = arvtool.job({}, mock.MagicMock(), runtimeContext) + + next(it).run(runtimeContext) + next(it).run(runtimeContext) + + with open("tests/wf/echo-subwf.cwl") as f: + subwf = StripYAMLComments(f.read()) + + runner.api.container_requests().create.assert_called_with( + body=JsonDiffMatcher({ + 'output_ttl': 0, + 'environment': {'HOME': '/var/spool/cwl', 'TMPDIR': '/tmp'}, + 'scheduling_parameters': {}, + 'name': u'echo-subwf', + 'secret_mounts': {}, + 'runtime_constraints': {'API': True, 'vcpus': 3, 'ram': 1073741824}, + 'properties': {}, + 'priority': 500, + 'mounts': { + '/var/spool/cwl/cwl.input.yml': { + 'portable_data_hash': '99999999999999999999999999999996+99', + 'kind': 'collection', + 'path': 'cwl.input.yml' + }, + '/var/spool/cwl/workflow.cwl': { + 'portable_data_hash': '99999999999999999999999999999996+99', + 'kind': 'collection', + 'path': 'workflow.cwl' + }, + 'stdout': { + 'path': '/var/spool/cwl/cwl.output.json', + 'kind': 'file' + }, + '/tmp': { + 'kind': 'tmp', + 'capacity': 1073741824 + }, '/var/spool/cwl': { + 'kind': 'tmp', + 'capacity': 3221225472 + } + }, + 'state': 'Committed', + 'output_path': '/var/spool/cwl', + 'container_image': '99999999999999999999999999999993+99', + 'command': [ + u'cwltool', + u'--no-container', + u'--move-outputs', + u'--preserve-entire-environment', + u'workflow.cwl', + u'cwl.input.yml' + ], + 'use_existing': True, + 'output_name': u'Output for step echo-subwf', + 'cwd': '/var/spool/cwl' + })) + + def test_default_work_api(self): + arvados_cwl.add_arv_hints() + + api = mock.MagicMock() + api._rootDesc = copy.deepcopy(get_rootDesc()) + runner = arvados_cwl.executor.ArvCwlExecutor(api) + self.assertEqual(runner.work_api, 'containers')