Merge branch '9397-cwl-initialworkdir-crunchv2' closes #9397
[arvados.git] / sdk / cwl / tests / test_container.py
index 8ca5398e82f7ba4606bc7ac212e2a61e987872a9..68cb3273fd147d526d30801ba4955e26bfe08529 100644 (file)
@@ -1,4 +1,5 @@
 import arvados_cwl
+from arvados_cwl.arvdocker import arv_docker_clear_cache
 import logging
 import mock
 import unittest
@@ -6,8 +7,9 @@ import os
 import functools
 import cwltool.process
 from schema_salad.ref_resolver import Loader
+from schema_salad.sourceline import cmap
 
-from schema_salad.ref_resolver import Loader
+from .matcher import JsonDiffMatcher
 
 if not os.getenv('ARVADOS_DEBUG'):
     logging.getLogger('arvados.cwl-runner').setLevel(logging.WARN)
@@ -21,6 +23,8 @@ class TestContainer(unittest.TestCase):
     @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
     def test_run(self, keepdocker):
         for enable_reuse in (True, False):
+            arv_docker_clear_cache()
+
             runner = mock.MagicMock()
             runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
             runner.ignore_docker_for_reuse = False
@@ -31,12 +35,12 @@ class TestContainer(unittest.TestCase):
 
             document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
 
-            tool = {
+            tool = cmap({
                 "inputs": [],
                 "outputs": [],
                 "baseCommand": "ls",
                 "arguments": [{"valueFrom": "$(runtime.outdir)"}]
-            }
+            })
             make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api)
             arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names,
                                                      basedir="", make_fs_access=make_fs_access, loader=Loader({}))
@@ -45,7 +49,7 @@ class TestContainer(unittest.TestCase):
                                  make_fs_access=make_fs_access, tmpdir="/tmp"):
                 j.run(enable_reuse=enable_reuse)
                 runner.api.container_requests().create.assert_called_with(
-                    body={
+                    body=JsonDiffMatcher({
                         'environment': {
                             'HOME': '/var/spool/cwl',
                             'TMPDIR': '/tmp'
@@ -65,13 +69,16 @@ class TestContainer(unittest.TestCase):
                         'output_path': '/var/spool/cwl',
                         'container_image': '99999999999999999999999999999993+99',
                         'command': ['ls', '/var/spool/cwl'],
-                        'cwd': '/var/spool/cwl'
-                    })
+                        'cwd': '/var/spool/cwl',
+                        'scheduling_parameters': {},
+                        'properties': {},
+                    }))
 
     # The test passes some fields in builder.resources
     # For the remaining fields, the defaults will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
     @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
     def test_resource_requirements(self, keepdocker):
+        arv_docker_clear_cache()
         runner = mock.MagicMock()
         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
         runner.ignore_docker_for_reuse = False
@@ -81,7 +88,7 @@ class TestContainer(unittest.TestCase):
         runner.api.collections().get().execute.return_value = {
             "portable_data_hash": "99999999999999999999999999999993+99"}
 
-        tool = {
+        tool = cmap({
             "inputs": [],
             "outputs": [],
             "hints": [{
@@ -99,7 +106,7 @@ class TestContainer(unittest.TestCase):
                 "partition": "blurb"
             }],
             "baseCommand": "ls"
-        }
+        })
         make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api)
         arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers",
                                                  avsc_names=avsc_names, make_fs_access=make_fs_access,
@@ -109,8 +116,9 @@ class TestContainer(unittest.TestCase):
                              make_fs_access=make_fs_access, tmpdir="/tmp"):
             j.run()
 
-        runner.api.container_requests().create.assert_called_with(
-            body={
+        call_args, call_kwargs = runner.api.container_requests().create.call_args
+
+        call_body_expected = {
                 'environment': {
                     'HOME': '/var/spool/cwl',
                     'TMPDIR': '/tmp'
@@ -119,9 +127,8 @@ class TestContainer(unittest.TestCase):
                 'runtime_constraints': {
                     'vcpus': 3,
                     'ram': 3145728000,
-                    'keep_cache_ram': 512,
-                    'API': True,
-                    'partition': ['blurb']
+                    'keep_cache_ram': 536870912,
+                    'API': True
                 },
                 'use_existing': True,
                 'priority': 1,
@@ -133,67 +140,156 @@ class TestContainer(unittest.TestCase):
                 'output_path': '/var/spool/cwl',
                 'container_image': '99999999999999999999999999999993+99',
                 'command': ['ls'],
-                'cwd': '/var/spool/cwl'
-            })
+                'cwd': '/var/spool/cwl',
+                'scheduling_parameters': {
+                    'partitions': ['blurb']
+                },
+                'properties': {}
+        }
+
+        call_body = call_kwargs.get('body', None)
+        self.assertNotEqual(None, call_body)
+        for key in call_body:
+            self.assertEqual(call_body_expected.get(key), call_body.get(key))
 
-    @mock.patch("arvados.collection.Collection")
-    def test_done(self, col):
-        api = mock.MagicMock()
 
+    # The test passes some fields in builder.resources
+    # For the remaining fields, the defaults will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
+    @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
+    @mock.patch("arvados.collection.Collection")
+    def test_initial_work_dir(self, collection_mock, keepdocker):
+        arv_docker_clear_cache()
         runner = mock.MagicMock()
-        runner.api = api
         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
-        runner.num_retries = 0
         runner.ignore_docker_for_reuse = False
+        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
 
-        col().open.return_value = []
-        api.collections().list().execute.side_effect = ({"items": []},
-                                                        {"items": [{"manifest_text": "XYZ"}]})
+        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
+        runner.api.collections().get().execute.return_value = {
+            "portable_data_hash": "99999999999999999999999999999993+99"}
 
-        arvjob = arvados_cwl.ArvadosContainer(runner)
-        arvjob.name = "testjob"
-        arvjob.builder = mock.MagicMock()
-        arvjob.output_callback = mock.MagicMock()
-        arvjob.collect_outputs = mock.MagicMock()
-        arvjob.successCodes = [0]
-        arvjob.outdir = "/var/spool/cwl"
+        sourcemock = mock.MagicMock()
+        def get_collection_mock(p):
+            if "/" in p:
+                return (sourcemock, p.split("/", 1)[1])
+            else:
+                return (sourcemock, "")
+        runner.fs_access.get_collection.side_effect = get_collection_mock
 
-        arvjob.done({
-            "state": "Complete",
-            "output": "99999999999999999999999999999993+99",
-            "log": "99999999999999999999999999999994+99",
-            "uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz",
-            "exit_code": 0
+        vwdmock = mock.MagicMock()
+        collection_mock.return_value = vwdmock
+        vwdmock.portable_data_hash.return_value = "99999999999999999999999999999996+99"
+
+        tool = cmap({
+            "inputs": [],
+            "outputs": [],
+            "hints": [{
+                "class": "InitialWorkDirRequirement",
+                "listing": [{
+                    "class": "File",
+                    "basename": "foo",
+                    "location": "keep:99999999999999999999999999999995+99/bar"
+                },
+                {
+                    "class": "Directory",
+                    "basename": "foo2",
+                    "location": "keep:99999999999999999999999999999995+99"
+                },
+                {
+                    "class": "File",
+                    "basename": "filename",
+                    "location": "keep:99999999999999999999999999999995+99/baz/filename"
+                },
+                {
+                    "class": "Directory",
+                    "basename": "subdir",
+                    "location": "keep:99999999999999999999999999999995+99/subdir"
+                }                        ]
+            }],
+            "baseCommand": "ls"
         })
+        make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api)
+        arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers",
+                                                 avsc_names=avsc_names, make_fs_access=make_fs_access,
+                                                 loader=Loader({}))
+        arvtool.formatgraph = None
+        for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_initial_work_dir",
+                             make_fs_access=make_fs_access, tmpdir="/tmp"):
+            j.run()
+
+        call_args, call_kwargs = runner.api.container_requests().create.call_args
 
-        api.collections().list.assert_has_calls([
-            mock.call(),
-            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
-                          ['portable_data_hash', '=', '99999999999999999999999999999993+99'],
-                          ['name', '=', 'Output 9999999 of testjob']]),
-            mock.call().execute(num_retries=0),
-            mock.call(limit=1, filters=[['portable_data_hash', '=', '99999999999999999999999999999993+99']],
-                 select=['manifest_text']),
-            mock.call().execute(num_retries=0)])
-
-        api.collections().create.assert_called_with(
-            ensure_unique_name=True,
-            body={'portable_data_hash': '99999999999999999999999999999993+99',
-                  'manifest_text': 'XYZ',
-                  'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                  'name': 'Output 9999999 of testjob'})
+        vwdmock.copy.assert_has_calls([mock.call('bar', 'foo', source_collection=sourcemock)])
+        vwdmock.copy.assert_has_calls([mock.call('', 'foo2', source_collection=sourcemock)])
+        vwdmock.copy.assert_has_calls([mock.call('baz/filename', 'filename', source_collection=sourcemock)])
+        vwdmock.copy.assert_has_calls([mock.call('subdir', 'subdir', source_collection=sourcemock)])
+
+        call_body_expected = {
+                'environment': {
+                    'HOME': '/var/spool/cwl',
+                    'TMPDIR': '/tmp'
+                },
+                'name': 'test_initial_work_dir',
+                'runtime_constraints': {
+                    'vcpus': 1,
+                    'ram': 1073741824
+                },
+                'use_existing': True,
+                'priority': 1,
+                'mounts': {
+                    '/var/spool/cwl': {'kind': 'tmp'},
+                    '/var/spool/cwl/foo': {
+                        'kind': 'collection',
+                        'path': 'foo',
+                        'portable_data_hash': '99999999999999999999999999999996+99'
+                    },
+                    '/var/spool/cwl/foo2': {
+                        'kind': 'collection',
+                        'path': 'foo2',
+                        'portable_data_hash': '99999999999999999999999999999996+99'
+                    },
+                    '/var/spool/cwl/filename': {
+                        'kind': 'collection',
+                        'path': 'filename',
+                        'portable_data_hash': '99999999999999999999999999999996+99'
+                    },
+                    '/var/spool/cwl/subdir': {
+                        'kind': 'collection',
+                        'path': 'subdir',
+                        'portable_data_hash': '99999999999999999999999999999996+99'
+                    }
+                },
+                'state': 'Committed',
+                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+                'output_path': '/var/spool/cwl',
+                'container_image': '99999999999999999999999999999993+99',
+                'command': ['ls'],
+                'cwd': '/var/spool/cwl',
+                'scheduling_parameters': {
+                },
+                'properties': {}
+        }
+
+        call_body = call_kwargs.get('body', None)
+        self.assertNotEqual(None, call_body)
+        for key in call_body:
+            self.assertEqual(call_body_expected.get(key), call_body.get(key))
 
     @mock.patch("arvados.collection.Collection")
-    def test_done_use_existing_collection(self, col):
+    def test_done(self, col):
         api = mock.MagicMock()
 
         runner = mock.MagicMock()
         runner.api = api
         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
         runner.num_retries = 0
+        runner.ignore_docker_for_reuse = False
+
+        runner.api.containers().get().execute.return_value = {"state":"Complete",
+                                                              "output": "abc+123",
+                                                              "exit_code": 0}
 
         col().open.return_value = []
-        api.collections().list().execute.side_effect = ({"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2"}]},)
 
         arvjob = arvados_cwl.ArvadosContainer(runner)
         arvjob.name = "testjob"
@@ -203,19 +299,17 @@ class TestContainer(unittest.TestCase):
         arvjob.successCodes = [0]
         arvjob.outdir = "/var/spool/cwl"
 
+        arvjob.collect_outputs.return_value = {"out": "stuff"}
+
         arvjob.done({
-            "state": "Complete",
-            "output": "99999999999999999999999999999993+99",
-            "log": "99999999999999999999999999999994+99",
-            "uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz",
-            "exit_code": 0
+            "state": "Final",
+            "log_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz1",
+            "output_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2",
+            "uuid": "zzzzz-xvhdp-zzzzzzzzzzzzzzz",
+            "container_uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
         })
 
-        api.collections().list.assert_has_calls([
-            mock.call(),
-            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
-                               ['portable_data_hash', '=', '99999999999999999999999999999993+99'],
-                               ['name', '=', 'Output 9999999 of testjob']]),
-            mock.call().execute(num_retries=0)])
-
         self.assertFalse(api.collections().create.called)
+
+        arvjob.collect_outputs.assert_called_with("keep:abc+123")
+        arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")