11462: Store CollectionReader objects in a central cache to avoid redundant
[arvados.git] / sdk / cwl / tests / test_container.py
1 import arvados_cwl
2 from arvados_cwl.arvdocker import arv_docker_clear_cache
3 import logging
4 import mock
5 import unittest
6 import os
7 import functools
8 import cwltool.process
9 from schema_salad.ref_resolver import Loader
10 from schema_salad.sourceline import cmap
11
12 from .matcher import JsonDiffMatcher
13
14 if not os.getenv('ARVADOS_DEBUG'):
15     logging.getLogger('arvados.cwl-runner').setLevel(logging.WARN)
16     logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
17
18
19 class TestContainer(unittest.TestCase):
20
21     # The test passes no builder.resources
22     # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
23     @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
24     def test_run(self, keepdocker):
25         for enable_reuse in (True, False):
26             arv_docker_clear_cache()
27
28             runner = mock.MagicMock()
29             runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
30             runner.ignore_docker_for_reuse = False
31
32             keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
33             runner.api.collections().get().execute.return_value = {
34                 "portable_data_hash": "99999999999999999999999999999993+99"}
35
36             document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
37
38             tool = cmap({
39                 "inputs": [],
40                 "outputs": [],
41                 "baseCommand": "ls",
42                 "arguments": [{"valueFrom": "$(runtime.outdir)"}]
43             })
44             make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
45                                          collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
46             arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names,
47                                                      basedir="", make_fs_access=make_fs_access, loader=Loader({}))
48             arvtool.formatgraph = None
49             for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_run_"+str(enable_reuse),
50                                  make_fs_access=make_fs_access, tmpdir="/tmp"):
51                 j.run(enable_reuse=enable_reuse)
52                 runner.api.container_requests().create.assert_called_with(
53                     body=JsonDiffMatcher({
54                         'environment': {
55                             'HOME': '/var/spool/cwl',
56                             'TMPDIR': '/tmp'
57                         },
58                         'name': 'test_run_'+str(enable_reuse),
59                         'runtime_constraints': {
60                             'vcpus': 1,
61                             'ram': 1073741824
62                         },
63                         'use_existing': enable_reuse,
64                         'priority': 1,
65                         'mounts': {
66                             '/var/spool/cwl': {'kind': 'tmp'}
67                         },
68                         'state': 'Committed',
69                         'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
70                         'output_path': '/var/spool/cwl',
71                         'container_image': 'arvados/jobs',
72                         'command': ['ls', '/var/spool/cwl'],
73                         'cwd': '/var/spool/cwl',
74                         'scheduling_parameters': {},
75                         'properties': {},
76                     }))
77
78     # The test passes some fields in builder.resources
79     # For the remaining fields, the defaults will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
80     @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
81     def test_resource_requirements(self, keepdocker):
82         arv_docker_clear_cache()
83         runner = mock.MagicMock()
84         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
85         runner.ignore_docker_for_reuse = False
86         document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
87
88         keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
89         runner.api.collections().get().execute.return_value = {
90             "portable_data_hash": "99999999999999999999999999999993+99"}
91
92         tool = cmap({
93             "inputs": [],
94             "outputs": [],
95             "hints": [{
96                 "class": "ResourceRequirement",
97                 "coresMin": 3,
98                 "ramMin": 3000,
99                 "tmpdirMin": 4000
100             }, {
101                 "class": "http://arvados.org/cwl#RuntimeConstraints",
102                 "keep_cache": 512
103             }, {
104                 "class": "http://arvados.org/cwl#APIRequirement",
105             }, {
106                 "class": "http://arvados.org/cwl#PartitionRequirement",
107                 "partition": "blurb"
108             }],
109             "baseCommand": "ls"
110         })
111         make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
112                                          collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
113         arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers",
114                                                  avsc_names=avsc_names, make_fs_access=make_fs_access,
115                                                  loader=Loader({}))
116         arvtool.formatgraph = None
117         for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_resource_requirements",
118                              make_fs_access=make_fs_access, tmpdir="/tmp"):
119             j.run()
120
121         call_args, call_kwargs = runner.api.container_requests().create.call_args
122
123         call_body_expected = {
124                 'environment': {
125                     'HOME': '/var/spool/cwl',
126                     'TMPDIR': '/tmp'
127                 },
128                 'name': 'test_resource_requirements',
129                 'runtime_constraints': {
130                     'vcpus': 3,
131                     'ram': 3145728000,
132                     'keep_cache_ram': 536870912,
133                     'API': True
134                 },
135                 'use_existing': True,
136                 'priority': 1,
137                 'mounts': {
138                     '/var/spool/cwl': {'kind': 'tmp'}
139                 },
140                 'state': 'Committed',
141                 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
142                 'output_path': '/var/spool/cwl',
143                 'container_image': 'arvados/jobs',
144                 'command': ['ls'],
145                 'cwd': '/var/spool/cwl',
146                 'scheduling_parameters': {
147                     'partitions': ['blurb']
148                 },
149                 'properties': {}
150         }
151
152         call_body = call_kwargs.get('body', None)
153         self.assertNotEqual(None, call_body)
154         for key in call_body:
155             self.assertEqual(call_body_expected.get(key), call_body.get(key))
156
157
158     # The test passes some fields in builder.resources
159     # For the remaining fields, the defaults will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
160     @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
161     @mock.patch("arvados.collection.Collection")
162     def test_initial_work_dir(self, collection_mock, keepdocker):
163         arv_docker_clear_cache()
164         runner = mock.MagicMock()
165         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
166         runner.ignore_docker_for_reuse = False
167         document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
168
169         keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
170         runner.api.collections().get().execute.return_value = {
171             "portable_data_hash": "99999999999999999999999999999993+99"}
172
173         sourcemock = mock.MagicMock()
174         def get_collection_mock(p):
175             if "/" in p:
176                 return (sourcemock, p.split("/", 1)[1])
177             else:
178                 return (sourcemock, "")
179         runner.fs_access.get_collection.side_effect = get_collection_mock
180
181         vwdmock = mock.MagicMock()
182         collection_mock.return_value = vwdmock
183         vwdmock.portable_data_hash.return_value = "99999999999999999999999999999996+99"
184
185         tool = cmap({
186             "inputs": [],
187             "outputs": [],
188             "hints": [{
189                 "class": "InitialWorkDirRequirement",
190                 "listing": [{
191                     "class": "File",
192                     "basename": "foo",
193                     "location": "keep:99999999999999999999999999999995+99/bar"
194                 },
195                 {
196                     "class": "Directory",
197                     "basename": "foo2",
198                     "location": "keep:99999999999999999999999999999995+99"
199                 },
200                 {
201                     "class": "File",
202                     "basename": "filename",
203                     "location": "keep:99999999999999999999999999999995+99/baz/filename"
204                 },
205                 {
206                     "class": "Directory",
207                     "basename": "subdir",
208                     "location": "keep:99999999999999999999999999999995+99/subdir"
209                 }                        ]
210             }],
211             "baseCommand": "ls"
212         })
213         make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
214                                          collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
215         arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers",
216                                                  avsc_names=avsc_names, make_fs_access=make_fs_access,
217                                                  loader=Loader({}))
218         arvtool.formatgraph = None
219         for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_initial_work_dir",
220                              make_fs_access=make_fs_access, tmpdir="/tmp"):
221             j.run()
222
223         call_args, call_kwargs = runner.api.container_requests().create.call_args
224
225         vwdmock.copy.assert_has_calls([mock.call('bar', 'foo', source_collection=sourcemock)])
226         vwdmock.copy.assert_has_calls([mock.call('', 'foo2', source_collection=sourcemock)])
227         vwdmock.copy.assert_has_calls([mock.call('baz/filename', 'filename', source_collection=sourcemock)])
228         vwdmock.copy.assert_has_calls([mock.call('subdir', 'subdir', source_collection=sourcemock)])
229
230         call_body_expected = {
231                 'environment': {
232                     'HOME': '/var/spool/cwl',
233                     'TMPDIR': '/tmp'
234                 },
235                 'name': 'test_initial_work_dir',
236                 'runtime_constraints': {
237                     'vcpus': 1,
238                     'ram': 1073741824
239                 },
240                 'use_existing': True,
241                 'priority': 1,
242                 'mounts': {
243                     '/var/spool/cwl': {'kind': 'tmp'},
244                     '/var/spool/cwl/foo': {
245                         'kind': 'collection',
246                         'path': 'foo',
247                         'portable_data_hash': '99999999999999999999999999999996+99'
248                     },
249                     '/var/spool/cwl/foo2': {
250                         'kind': 'collection',
251                         'path': 'foo2',
252                         'portable_data_hash': '99999999999999999999999999999996+99'
253                     },
254                     '/var/spool/cwl/filename': {
255                         'kind': 'collection',
256                         'path': 'filename',
257                         'portable_data_hash': '99999999999999999999999999999996+99'
258                     },
259                     '/var/spool/cwl/subdir': {
260                         'kind': 'collection',
261                         'path': 'subdir',
262                         'portable_data_hash': '99999999999999999999999999999996+99'
263                     }
264                 },
265                 'state': 'Committed',
266                 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
267                 'output_path': '/var/spool/cwl',
268                 'container_image': 'arvados/jobs',
269                 'command': ['ls'],
270                 'cwd': '/var/spool/cwl',
271                 'scheduling_parameters': {
272                 },
273                 'properties': {}
274         }
275
276         call_body = call_kwargs.get('body', None)
277         self.assertNotEqual(None, call_body)
278         for key in call_body:
279             self.assertEqual(call_body_expected.get(key), call_body.get(key))
280
281
282     # Test redirecting stdin/stdout/stderr
283     @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
284     def test_redirects(self, keepdocker):
285         arv_docker_clear_cache()
286
287         runner = mock.MagicMock()
288         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
289         runner.ignore_docker_for_reuse = False
290
291         keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
292         runner.api.collections().get().execute.return_value = {
293             "portable_data_hash": "99999999999999999999999999999993+99"}
294
295         document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
296
297         tool = cmap({
298             "inputs": [],
299             "outputs": [],
300             "baseCommand": "ls",
301             "stdout": "stdout.txt",
302             "stderr": "stderr.txt",
303             "stdin": "/keep/99999999999999999999999999999996+99/file.txt",
304             "arguments": [{"valueFrom": "$(runtime.outdir)"}]
305         })
306         make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
307                                          collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
308         arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names,
309                                                  basedir="", make_fs_access=make_fs_access, loader=Loader({}))
310         arvtool.formatgraph = None
311         for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_run_redirect",
312                              make_fs_access=make_fs_access, tmpdir="/tmp"):
313             j.run()
314             runner.api.container_requests().create.assert_called_with(
315                 body=JsonDiffMatcher({
316                     'environment': {
317                         'HOME': '/var/spool/cwl',
318                         'TMPDIR': '/tmp'
319                     },
320                     'name': 'test_run_redirect',
321                     'runtime_constraints': {
322                         'vcpus': 1,
323                         'ram': 1073741824
324                     },
325                     'use_existing': True,
326                     'priority': 1,
327                     'mounts': {
328                         '/var/spool/cwl': {'kind': 'tmp'},
329                         "stderr": {
330                             "kind": "file",
331                             "path": "/var/spool/cwl/stderr.txt"
332                         },
333                         "stdin": {
334                             "kind": "collection",
335                             "path": "file.txt",
336                             "portable_data_hash": "99999999999999999999999999999996+99"
337                         },
338                         "stdout": {
339                             "kind": "file",
340                             "path": "/var/spool/cwl/stdout.txt"
341                         },
342                     },
343                     'state': 'Committed',
344                     'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
345                     'output_path': '/var/spool/cwl',
346                     'container_image': 'arvados/jobs',
347                     'command': ['ls', '/var/spool/cwl'],
348                     'cwd': '/var/spool/cwl',
349                     'scheduling_parameters': {},
350                     'properties': {},
351                 }))
352
353     @mock.patch("arvados.collection.Collection")
354     def test_done(self, col):
355         api = mock.MagicMock()
356
357         runner = mock.MagicMock()
358         runner.api = api
359         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
360         runner.num_retries = 0
361         runner.ignore_docker_for_reuse = False
362
363         runner.api.containers().get().execute.return_value = {"state":"Complete",
364                                                               "output": "abc+123",
365                                                               "exit_code": 0}
366
367         col().open.return_value = []
368
369         arvjob = arvados_cwl.ArvadosContainer(runner)
370         arvjob.name = "testjob"
371         arvjob.builder = mock.MagicMock()
372         arvjob.output_callback = mock.MagicMock()
373         arvjob.collect_outputs = mock.MagicMock()
374         arvjob.successCodes = [0]
375         arvjob.outdir = "/var/spool/cwl"
376
377         arvjob.collect_outputs.return_value = {"out": "stuff"}
378
379         arvjob.done({
380             "state": "Final",
381             "log_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz1",
382             "output_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2",
383             "uuid": "zzzzz-xvhdp-zzzzzzzzzzzzzzz",
384             "container_uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
385         })
386
387         self.assertFalse(api.collections().create.called)
388
389         arvjob.collect_outputs.assert_called_with("keep:abc+123")
390         arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")