Correct typo in field name to 'memoryRetryMultiplier'
[arvados.git] / sdk / cwl / arvados_cwl / arvcontainer.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from future import standard_library
6 standard_library.install_aliases()
7 from builtins import str
8
9 import logging
10 import json
11 import os
12 import urllib.request, urllib.parse, urllib.error
13 import time
14 import datetime
15 import ciso8601
16 import uuid
17 import math
18 import re
19
20 import arvados_cwl.util
21 import ruamel.yaml
22
23 from cwltool.errors import WorkflowException
24 from cwltool.process import UnsupportedRequirement, shortname
25 from cwltool.utils import aslist, adjustFileObjs, adjustDirObjs, visit_class
26 from cwltool.job import JobBase
27
28 import arvados.collection
29
30 from .arvdocker import arv_docker_get_image
31 from . import done
32 from .runner import Runner, arvados_jobs_image, packed_workflow, trim_anonymous_location, remove_redundant_fields, make_builder
33 from .fsaccess import CollectionFetcher
34 from .pathmapper import NoFollowPathMapper, trim_listing
35 from .perf import Perf
36 from ._version import __version__
37
38 logger = logging.getLogger('arvados.cwl-runner')
39 metrics = logging.getLogger('arvados.cwl-runner.metrics')
40
41 def cleanup_name_for_collection(name):
42     return name.replace("/", " ")
43
44 class ArvadosContainer(JobBase):
45     """Submit and manage a Crunch container request for executing a CWL CommandLineTool."""
46
47     def __init__(self, runner, job_runtime,
48                  builder,   # type: Builder
49                  joborder,  # type: Dict[Text, Union[Dict[Text, Any], List, Text]]
50                  make_path_mapper,  # type: Callable[..., PathMapper]
51                  requirements,      # type: List[Dict[Text, Text]]
52                  hints,     # type: List[Dict[Text, Text]]
53                  name       # type: Text
54     ):
55         super(ArvadosContainer, self).__init__(builder, joborder, make_path_mapper, requirements, hints, name)
56         self.arvrunner = runner
57         self.job_runtime = job_runtime
58         self.running = False
59         self.uuid = None
60         self.attempt_count = 0
61
62     def update_pipeline_component(self, r):
63         pass
64
65     def _required_env(self):
66         env = {}
67         env["HOME"] = self.outdir
68         env["TMPDIR"] = self.tmpdir
69         return env
70
71     def run(self, toplevelRuntimeContext):
72         # ArvadosCommandTool subclasses from cwltool.CommandLineTool,
73         # which calls makeJobRunner() to get a new ArvadosContainer
74         # object.  The fields that define execution such as
75         # command_line, environment, etc are set on the
76         # ArvadosContainer object by CommandLineTool.job() before
77         # run() is called.
78
79         runtimeContext = self.job_runtime
80
81         if runtimeContext.submit_request_uuid:
82             container_request = self.arvrunner.api.container_requests().get(
83                 uuid=runtimeContext.submit_request_uuid
84             ).execute(num_retries=self.arvrunner.num_retries)
85         else:
86             container_request = {}
87
88         container_request["command"] = self.command_line
89         container_request["name"] = self.name
90         container_request["output_path"] = self.outdir
91         container_request["cwd"] = self.outdir
92         container_request["priority"] = runtimeContext.priority
93         container_request["state"] = "Uncommitted"
94         container_request.setdefault("properties", {})
95
96         container_request["properties"]["cwl_input"] = self.joborder
97
98         runtime_constraints = {}
99
100         if runtimeContext.project_uuid:
101             container_request["owner_uuid"] = runtimeContext.project_uuid
102
103         if self.arvrunner.secret_store.has_secret(self.command_line):
104             raise WorkflowException("Secret material leaked on command line, only file literals may contain secrets")
105
106         if self.arvrunner.secret_store.has_secret(self.environment):
107             raise WorkflowException("Secret material leaked in environment, only file literals may contain secrets")
108
109         resources = self.builder.resources
110         if resources is not None:
111             runtime_constraints["vcpus"] = math.ceil(resources.get("cores", 1))
112             runtime_constraints["ram"] = math.ceil(resources.get("ram") * 2**20)
113
114         mounts = {
115             self.outdir: {
116                 "kind": "tmp",
117                 "capacity": math.ceil(resources.get("outdirSize", 0) * 2**20)
118             },
119             self.tmpdir: {
120                 "kind": "tmp",
121                 "capacity": math.ceil(resources.get("tmpdirSize", 0) * 2**20)
122             }
123         }
124         secret_mounts = {}
125         scheduling_parameters = {}
126
127         rf = [self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files]
128         rf.sort(key=lambda k: k.resolved)
129         prevdir = None
130         for resolved, target, tp, stg in rf:
131             if not stg:
132                 continue
133             if prevdir and target.startswith(prevdir):
134                 continue
135             if tp == "Directory":
136                 targetdir = target
137             else:
138                 targetdir = os.path.dirname(target)
139             sp = resolved.split("/", 1)
140             pdh = sp[0][5:]   # remove "keep:"
141             mounts[targetdir] = {
142                 "kind": "collection",
143                 "portable_data_hash": pdh
144             }
145             if pdh in self.pathmapper.pdh_to_uuid:
146                 mounts[targetdir]["uuid"] = self.pathmapper.pdh_to_uuid[pdh]
147             if len(sp) == 2:
148                 if tp == "Directory":
149                     path = sp[1]
150                 else:
151                     path = os.path.dirname(sp[1])
152                 if path and path != "/":
153                     mounts[targetdir]["path"] = path
154             prevdir = targetdir + "/"
155
156         intermediate_collection_info = arvados_cwl.util.get_intermediate_collection_info(self.name, runtimeContext.current_container, runtimeContext.intermediate_output_ttl)
157
158         with Perf(metrics, "generatefiles %s" % self.name):
159             if self.generatefiles["listing"]:
160                 vwd = arvados.collection.Collection(api_client=self.arvrunner.api,
161                                                     keep_client=self.arvrunner.keep_client,
162                                                     num_retries=self.arvrunner.num_retries)
163                 generatemapper = NoFollowPathMapper(self.generatefiles["listing"], "", "",
164                                                     separateDirs=False)
165
166                 sorteditems = sorted(generatemapper.items(), key=lambda n: n[1].target)
167
168                 logger.debug("generatemapper is %s", sorteditems)
169
170                 with Perf(metrics, "createfiles %s" % self.name):
171                     for f, p in sorteditems:
172                         if not p.target:
173                             continue
174
175                         if p.target.startswith("/"):
176                             dst = p.target[len(self.outdir)+1:] if p.target.startswith(self.outdir+"/") else p.target[1:]
177                         else:
178                             dst = p.target
179
180                         if p.type in ("File", "Directory", "WritableFile", "WritableDirectory"):
181                             if p.resolved.startswith("_:"):
182                                 vwd.mkdirs(dst)
183                             else:
184                                 source, path = self.arvrunner.fs_access.get_collection(p.resolved)
185                                 vwd.copy(path or ".", dst, source_collection=source)
186                         elif p.type == "CreateFile":
187                             if self.arvrunner.secret_store.has_secret(p.resolved):
188                                 mountpoint = p.target if p.target.startswith("/") else os.path.join(self.outdir, p.target)
189                                 secret_mounts[mountpoint] = {
190                                     "kind": "text",
191                                     "content": self.arvrunner.secret_store.retrieve(p.resolved)
192                                 }
193                             else:
194                                 with vwd.open(dst, "w") as n:
195                                     n.write(p.resolved)
196
197                 def keepemptydirs(p):
198                     if isinstance(p, arvados.collection.RichCollectionBase):
199                         if len(p) == 0:
200                             p.open(".keep", "w").close()
201                         else:
202                             for c in p:
203                                 keepemptydirs(p[c])
204
205                 keepemptydirs(vwd)
206
207                 if not runtimeContext.current_container:
208                     runtimeContext.current_container = arvados_cwl.util.get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger)
209                 vwd.save_new(name=intermediate_collection_info["name"],
210                              owner_uuid=runtimeContext.project_uuid,
211                              ensure_unique_name=True,
212                              trash_at=intermediate_collection_info["trash_at"],
213                              properties=intermediate_collection_info["properties"])
214
215                 prev = None
216                 for f, p in sorteditems:
217                     if (not p.target or self.arvrunner.secret_store.has_secret(p.resolved) or
218                         (prev is not None and p.target.startswith(prev))):
219                         continue
220                     if p.target.startswith("/"):
221                         dst = p.target[len(self.outdir)+1:] if p.target.startswith(self.outdir+"/") else p.target[1:]
222                     else:
223                         dst = p.target
224                     mountpoint = p.target if p.target.startswith("/") else os.path.join(self.outdir, p.target)
225                     mounts[mountpoint] = {"kind": "collection",
226                                           "portable_data_hash": vwd.portable_data_hash(),
227                                           "path": dst}
228                     if p.type.startswith("Writable"):
229                         mounts[mountpoint]["writable"] = True
230                     prev = p.target + "/"
231
232         container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir}
233         if self.environment:
234             container_request["environment"].update(self.environment)
235
236         if self.stdin:
237             sp = self.stdin[6:].split("/", 1)
238             mounts["stdin"] = {"kind": "collection",
239                                 "portable_data_hash": sp[0],
240                                 "path": sp[1]}
241
242         if self.stderr:
243             mounts["stderr"] = {"kind": "file",
244                                 "path": "%s/%s" % (self.outdir, self.stderr)}
245
246         if self.stdout:
247             mounts["stdout"] = {"kind": "file",
248                                 "path": "%s/%s" % (self.outdir, self.stdout)}
249
250         (docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
251
252         container_request["container_image"] = arv_docker_get_image(self.arvrunner.api,
253                                                                     docker_req,
254                                                                     runtimeContext.pull_image,
255                                                                     runtimeContext)
256
257         network_req, _ = self.get_requirement("NetworkAccess")
258         if network_req:
259             runtime_constraints["API"] = network_req["networkAccess"]
260
261         api_req, _ = self.get_requirement("http://arvados.org/cwl#APIRequirement")
262         if api_req:
263             runtime_constraints["API"] = True
264
265         use_disk_cache = (self.arvrunner.api.config()["Containers"].get("DefaultKeepCacheRAM", 0) == 0)
266
267         keep_cache_type_req, _ = self.get_requirement("http://arvados.org/cwl#KeepCacheTypeRequirement")
268         if keep_cache_type_req:
269             if "keepCacheType" in keep_cache_type_req:
270                 if keep_cache_type_req["keepCacheType"] == "ram_cache":
271                     use_disk_cache = False
272
273         runtime_req, _ = self.get_requirement("http://arvados.org/cwl#RuntimeConstraints")
274         if runtime_req:
275             if "keep_cache" in runtime_req:
276                 if use_disk_cache:
277                     # If DefaultKeepCacheRAM is zero it means we should use disk cache.
278                     runtime_constraints["keep_cache_disk"] = math.ceil(runtime_req["keep_cache"] * 2**20)
279                 else:
280                     runtime_constraints["keep_cache_ram"] = math.ceil(runtime_req["keep_cache"] * 2**20)
281             if "outputDirType" in runtime_req:
282                 if runtime_req["outputDirType"] == "local_output_dir":
283                     # Currently the default behavior.
284                     pass
285                 elif runtime_req["outputDirType"] == "keep_output_dir":
286                     mounts[self.outdir]= {
287                         "kind": "collection",
288                         "writable": True
289                     }
290
291         partition_req, _ = self.get_requirement("http://arvados.org/cwl#PartitionRequirement")
292         if partition_req:
293             scheduling_parameters["partitions"] = aslist(partition_req["partition"])
294
295         intermediate_output_req, _ = self.get_requirement("http://arvados.org/cwl#IntermediateOutput")
296         if intermediate_output_req:
297             self.output_ttl = intermediate_output_req["outputTTL"]
298         else:
299             self.output_ttl = self.arvrunner.intermediate_output_ttl
300
301         if self.output_ttl < 0:
302             raise WorkflowException("Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"])
303
304
305         if self.arvrunner.api._rootDesc["revision"] >= "20210628":
306             storage_class_req, _ = self.get_requirement("http://arvados.org/cwl#OutputStorageClass")
307             if storage_class_req and storage_class_req.get("intermediateStorageClass"):
308                 container_request["output_storage_classes"] = aslist(storage_class_req["intermediateStorageClass"])
309             else:
310                 container_request["output_storage_classes"] = runtimeContext.intermediate_storage_classes.strip().split(",")
311
312         cuda_req, _ = self.get_requirement("http://commonwl.org/cwltool#CUDARequirement")
313         if cuda_req:
314             runtime_constraints["cuda"] = {
315                 "device_count": resources.get("cudaDeviceCount", 1),
316                 "driver_version": cuda_req["cudaVersionMin"],
317                 "hardware_capability": aslist(cuda_req["cudaComputeCapability"])[0]
318             }
319
320         if runtimeContext.enable_preemptible is False:
321             scheduling_parameters["preemptible"] = False
322         else:
323             preemptible_req, _ = self.get_requirement("http://arvados.org/cwl#UsePreemptible")
324             if preemptible_req:
325                 scheduling_parameters["preemptible"] = preemptible_req["usePreemptible"]
326             elif runtimeContext.enable_preemptible is True:
327                 scheduling_parameters["preemptible"] = True
328             elif runtimeContext.enable_preemptible is None:
329                 pass
330
331         if self.timelimit is not None and self.timelimit > 0:
332             scheduling_parameters["max_run_time"] = self.timelimit
333
334         extra_submit_params = {}
335         if runtimeContext.submit_runner_cluster:
336             extra_submit_params["cluster_id"] = runtimeContext.submit_runner_cluster
337
338         container_request["output_name"] = cleanup_name_for_collection("Output from step %s" % (self.name))
339         container_request["output_ttl"] = self.output_ttl
340         container_request["mounts"] = mounts
341         container_request["secret_mounts"] = secret_mounts
342         container_request["runtime_constraints"] = runtime_constraints
343         container_request["scheduling_parameters"] = scheduling_parameters
344
345         enable_reuse = runtimeContext.enable_reuse
346         if enable_reuse:
347             reuse_req, _ = self.get_requirement("WorkReuse")
348             if reuse_req:
349                 enable_reuse = reuse_req["enableReuse"]
350             reuse_req, _ = self.get_requirement("http://arvados.org/cwl#ReuseRequirement")
351             if reuse_req:
352                 enable_reuse = reuse_req["enableReuse"]
353         container_request["use_existing"] = enable_reuse
354
355         properties_req, _ = self.get_requirement("http://arvados.org/cwl#ProcessProperties")
356         if properties_req:
357             for pr in properties_req["processProperties"]:
358                 container_request["properties"][pr["propertyName"]] = self.builder.do_eval(pr["propertyValue"])
359
360         output_properties_req, _ = self.get_requirement("http://arvados.org/cwl#OutputCollectionProperties")
361         if output_properties_req:
362             if self.arvrunner.api._rootDesc["revision"] >= "20220510":
363                 container_request["output_properties"] = {}
364                 for pr in output_properties_req["outputProperties"]:
365                     container_request["output_properties"][pr["propertyName"]] = self.builder.do_eval(pr["propertyValue"])
366             else:
367                 logger.warning("%s API revision is %s, revision %s is required to support setting properties on output collections.",
368                                self.arvrunner.label(self), self.arvrunner.api._rootDesc["revision"], "20220510")
369
370         ram_multiplier = [1]
371
372         oom_retry_req, _ = self.get_requirement("http://arvados.org/cwl#OutOfMemoryRetry")
373         if oom_retry_req:
374             if oom_retry_req.get('memoryRetryMultiplier'):
375                 ram_multiplier.append(oom_retry_req.get('memoryRetryMultiplier'))
376             elif oom_retry_req.get('memoryRetryMultipler'):
377                 ram_multiplier.append(oom_retry_req.get('memoryRetryMultipler'))
378
379         if runtimeContext.runnerjob.startswith("arvwf:"):
380             wfuuid = runtimeContext.runnerjob[6:runtimeContext.runnerjob.index("#")]
381             wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries)
382             if container_request["name"] == "main":
383                 container_request["name"] = wfrecord["name"]
384             container_request["properties"]["template_uuid"] = wfuuid
385
386         if self.attempt_count == 0:
387             self.output_callback = self.arvrunner.get_wrapped_callback(self.output_callback)
388
389         try:
390             ram = runtime_constraints["ram"]
391
392             self.uuid = runtimeContext.submit_request_uuid
393
394             for i in ram_multiplier:
395                 runtime_constraints["ram"] = ram * i
396
397                 if self.uuid:
398                     response = self.arvrunner.api.container_requests().update(
399                         uuid=self.uuid,
400                         body=container_request,
401                         **extra_submit_params
402                     ).execute(num_retries=self.arvrunner.num_retries)
403                 else:
404                     response = self.arvrunner.api.container_requests().create(
405                         body=container_request,
406                         **extra_submit_params
407                     ).execute(num_retries=self.arvrunner.num_retries)
408                     self.uuid = response["uuid"]
409
410                 if response["container_uuid"] is not None:
411                     break
412
413             if response["container_uuid"] is None:
414                 runtime_constraints["ram"] = ram * ram_multiplier[self.attempt_count]
415
416             container_request["state"] = "Committed"
417             response = self.arvrunner.api.container_requests().update(
418                 uuid=self.uuid,
419                 body=container_request,
420                 **extra_submit_params
421             ).execute(num_retries=self.arvrunner.num_retries)
422
423             self.arvrunner.process_submitted(self)
424             self.attempt_count += 1
425
426             if response["state"] == "Final":
427                 logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"])
428             else:
429                 logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"])
430         except Exception as e:
431             logger.exception("%s error submitting container\n%s", self.arvrunner.label(self), e)
432             logger.debug("Container request was %s", container_request)
433             self.output_callback({}, "permanentFail")
434
435     def out_of_memory_retry(self, record, container):
436         oom_retry_req, _ = self.get_requirement("http://arvados.org/cwl#OutOfMemoryRetry")
437         if oom_retry_req is None:
438             return False
439
440         # Sometimes it gets killed with no warning
441         if container["exit_code"] == 137:
442             return True
443
444         logc = arvados.collection.CollectionReader(record["log_uuid"],
445                                                    api_client=self.arvrunner.api,
446                                                    keep_client=self.arvrunner.keep_client,
447                                                    num_retries=self.arvrunner.num_retries)
448
449         loglines = [""]
450         def callback(v1, v2, v3):
451             loglines[0] = v3
452
453         done.logtail(logc, callback, "", maxlen=1000)
454
455         # Check allocation failure
456         oom_matches = oom_retry_req.get('memoryErrorRegex') or r'(bad_alloc|out ?of ?memory|memory ?error|container using over 9.% of memory)'
457         if re.search(oom_matches, loglines[0], re.IGNORECASE | re.MULTILINE):
458             return True
459
460         return False
461
462     def done(self, record):
463         outputs = {}
464         retried = False
465         rcode = None
466         try:
467             container = self.arvrunner.api.containers().get(
468                 uuid=record["container_uuid"]
469             ).execute(num_retries=self.arvrunner.num_retries)
470             if container["state"] == "Complete":
471                 rcode = container["exit_code"]
472                 if self.successCodes and rcode in self.successCodes:
473                     processStatus = "success"
474                 elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
475                     processStatus = "temporaryFail"
476                 elif self.permanentFailCodes and rcode in self.permanentFailCodes:
477                     processStatus = "permanentFail"
478                 elif rcode == 0:
479                     processStatus = "success"
480                 else:
481                     processStatus = "permanentFail"
482
483                 if processStatus == "permanentFail" and self.attempt_count == 1 and self.out_of_memory_retry(record, container):
484                     logger.warning("%s Container failed with out of memory error, retrying with more RAM.",
485                                  self.arvrunner.label(self))
486                     self.job_runtime.submit_request_uuid = None
487                     self.uuid = None
488                     self.run(None)
489                     retried = True
490                     return
491
492                 if rcode == 137:
493                     logger.warning("%s Container may have been killed for using too much RAM.  Try resubmitting with a higher 'ramMin' or use the arv:OutOfMemoryRetry feature.",
494                                  self.arvrunner.label(self))
495             else:
496                 processStatus = "permanentFail"
497
498             if processStatus == "permanentFail" and record["log_uuid"]:
499                 logc = arvados.collection.CollectionReader(record["log_uuid"],
500                                                            api_client=self.arvrunner.api,
501                                                            keep_client=self.arvrunner.keep_client,
502                                                            num_retries=self.arvrunner.num_retries)
503                 label = self.arvrunner.label(self)
504                 done.logtail(
505                     logc, logger.error,
506                     "%s (%s) error log:" % (label, record["uuid"]), maxlen=40, include_crunchrun=(rcode is None or rcode > 127))
507
508             if record["output_uuid"]:
509                 if self.arvrunner.trash_intermediate or self.arvrunner.intermediate_output_ttl:
510                     # Compute the trash time to avoid requesting the collection record.
511                     trash_at = ciso8601.parse_datetime_as_naive(record["modified_at"]) + datetime.timedelta(0, self.arvrunner.intermediate_output_ttl)
512                     aftertime = " at %s" % trash_at.strftime("%Y-%m-%d %H:%M:%S UTC") if self.arvrunner.intermediate_output_ttl else ""
513                     orpart = ", or" if self.arvrunner.trash_intermediate and self.arvrunner.intermediate_output_ttl else ""
514                     oncomplete = " upon successful completion of the workflow" if self.arvrunner.trash_intermediate else ""
515                     logger.info("%s Intermediate output %s (%s) will be trashed%s%s%s." % (
516                         self.arvrunner.label(self), record["output_uuid"], container["output"], aftertime, orpart, oncomplete))
517                 self.arvrunner.add_intermediate_output(record["output_uuid"])
518
519             if container["output"]:
520                 outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
521
522             properties = record["properties"].copy()
523             properties["cwl_output"] = outputs
524             self.arvrunner.api.container_requests().update(
525                 uuid=self.uuid,
526                 body={"container_request": {"properties": properties}}
527             ).execute(num_retries=self.arvrunner.num_retries)
528         except WorkflowException as e:
529             # Only include a stack trace if in debug mode.
530             # A stack trace may obfuscate more useful output about the workflow.
531             logger.error("%s unable to collect output from %s:\n%s",
532                          self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False))
533             processStatus = "permanentFail"
534         except Exception:
535             logger.exception("%s while getting output object:", self.arvrunner.label(self))
536             processStatus = "permanentFail"
537         finally:
538             if not retried:
539                 self.output_callback(outputs, processStatus)
540
541
542 class RunnerContainer(Runner):
543     """Submit and manage a container that runs arvados-cwl-runner."""
544
545     def arvados_job_spec(self, runtimeContext, git_info):
546         """Create an Arvados container request for this workflow.
547
548         The returned dict can be used to create a container passed as
549         the +body+ argument to container_requests().create().
550         """
551
552         adjustDirObjs(self.job_order, trim_listing)
553         visit_class(self.job_order, ("File", "Directory"), trim_anonymous_location)
554         visit_class(self.job_order, ("File", "Directory"), remove_redundant_fields)
555
556         secret_mounts = {}
557         for param in sorted(self.job_order.keys()):
558             if self.secret_store.has_secret(self.job_order[param]):
559                 mnt = "/secrets/s%d" % len(secret_mounts)
560                 secret_mounts[mnt] = {
561                     "kind": "text",
562                     "content": self.secret_store.retrieve(self.job_order[param])
563                 }
564                 self.job_order[param] = {"$include": mnt}
565
566         container_image = arvados_jobs_image(self.arvrunner, self.jobs_image, runtimeContext)
567
568         workflow_runner_req, _ = self.embedded_tool.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources")
569         if workflow_runner_req and workflow_runner_req.get("acrContainerImage"):
570             container_image = workflow_runner_req.get("acrContainerImage")
571
572         container_req = {
573             "name": self.name,
574             "output_path": "/var/spool/cwl",
575             "cwd": "/var/spool/cwl",
576             "priority": self.priority,
577             "state": "Committed",
578             "container_image": container_image,
579             "mounts": {
580                 "/var/lib/cwl/cwl.input.json": {
581                     "kind": "json",
582                     "content": self.job_order
583                 },
584                 "stdout": {
585                     "kind": "file",
586                     "path": "/var/spool/cwl/cwl.output.json"
587                 },
588                 "/var/spool/cwl": {
589                     "kind": "collection",
590                     "writable": True
591                 }
592             },
593             "secret_mounts": secret_mounts,
594             "runtime_constraints": {
595                 "vcpus": math.ceil(self.submit_runner_cores),
596                 "ram": 1024*1024 * (math.ceil(self.submit_runner_ram) + math.ceil(self.collection_cache_size)),
597                 "API": True
598             },
599             "use_existing": self.reuse_runner,
600             "properties": {}
601         }
602
603         if self.embedded_tool.tool.get("id", "").startswith("keep:"):
604             sp = self.embedded_tool.tool["id"].split('/')
605             workflowcollection = sp[0][5:]
606             workflowname = "/".join(sp[1:])
607             workflowpath = "/var/lib/cwl/workflow/%s" % workflowname
608             container_req["mounts"]["/var/lib/cwl/workflow"] = {
609                 "kind": "collection",
610                 "portable_data_hash": "%s" % workflowcollection
611             }
612         elif self.embedded_tool.tool.get("id", "").startswith("arvwf:"):
613             uuid, frg = urllib.parse.urldefrag(self.embedded_tool.tool["id"])
614             workflowpath = "/var/lib/cwl/workflow.json#" + frg
615             packedtxt = self.loadingContext.loader.fetch_text(uuid)
616             yaml = ruamel.yaml.YAML(typ='safe', pure=True)
617             packed = yaml.load(packedtxt)
618             container_req["mounts"]["/var/lib/cwl/workflow.json"] = {
619                 "kind": "json",
620                 "content": packed
621             }
622             container_req["properties"]["template_uuid"] = self.embedded_tool.tool["id"][6:33]
623         elif self.embedded_tool.tool.get("id", "").startswith("file:"):
624             raise WorkflowException("Tool id '%s' is a local file but expected keep: or arvwf:" % self.embedded_tool.tool.get("id"))
625         else:
626             main = self.loadingContext.loader.idx["_:main"]
627             if main.get("id") == "_:main":
628                 del main["id"]
629             workflowpath = "/var/lib/cwl/workflow.json#main"
630             container_req["mounts"]["/var/lib/cwl/workflow.json"] = {
631                 "kind": "json",
632                 "content": main
633             }
634
635         container_req["properties"].update({k.replace("http://arvados.org/cwl#", "arv:"): v for k, v in git_info.items()})
636
637         properties_req, _ = self.embedded_tool.get_requirement("http://arvados.org/cwl#ProcessProperties")
638         if properties_req:
639             builder = make_builder(self.job_order, self.embedded_tool.hints, self.embedded_tool.requirements, runtimeContext, self.embedded_tool.metadata)
640             for pr in properties_req["processProperties"]:
641                 container_req["properties"][pr["propertyName"]] = builder.do_eval(pr["propertyValue"])
642
643         # --local means execute the workflow instead of submitting a container request
644         # --api=containers means use the containers API
645         # --no-log-timestamps means don't add timestamps (the logging infrastructure does this)
646         # --disable-validate because we already validated so don't need to do it again
647         # --eval-timeout is the timeout for javascript invocation
648         # --parallel-task-count is the number of threads to use for job submission
649         # --enable/disable-reuse sets desired job reuse
650         # --collection-cache-size sets aside memory to store collections
651         command = ["arvados-cwl-runner",
652                    "--local",
653                    "--api=containers",
654                    "--no-log-timestamps",
655                    "--disable-validate",
656                    "--disable-color",
657                    "--eval-timeout=%s" % self.arvrunner.eval_timeout,
658                    "--thread-count=%s" % self.arvrunner.thread_count,
659                    "--enable-reuse" if self.enable_reuse else "--disable-reuse",
660                    "--collection-cache-size=%s" % self.collection_cache_size]
661
662         if self.output_name:
663             command.append("--output-name=" + self.output_name)
664             container_req["output_name"] = self.output_name
665
666         if self.output_tags:
667             command.append("--output-tags=" + self.output_tags)
668
669         if runtimeContext.debug:
670             command.append("--debug")
671
672         if runtimeContext.storage_classes != "default" and runtimeContext.storage_classes:
673             command.append("--storage-classes=" + runtimeContext.storage_classes)
674
675         if runtimeContext.intermediate_storage_classes != "default" and runtimeContext.intermediate_storage_classes:
676             command.append("--intermediate-storage-classes=" + runtimeContext.intermediate_storage_classes)
677
678         if runtimeContext.on_error:
679             command.append("--on-error=" + self.on_error)
680
681         if runtimeContext.intermediate_output_ttl:
682             command.append("--intermediate-output-ttl=%d" % runtimeContext.intermediate_output_ttl)
683
684         if runtimeContext.trash_intermediate:
685             command.append("--trash-intermediate")
686
687         if runtimeContext.project_uuid:
688             command.append("--project-uuid="+runtimeContext.project_uuid)
689
690         if self.enable_dev:
691             command.append("--enable-dev")
692
693         if runtimeContext.enable_preemptible is True:
694             command.append("--enable-preemptible")
695
696         if runtimeContext.enable_preemptible is False:
697             command.append("--disable-preemptible")
698
699         if runtimeContext.varying_url_params:
700             command.append("--varying-url-params="+runtimeContext.varying_url_params)
701
702         if runtimeContext.prefer_cached_downloads:
703             command.append("--prefer-cached-downloads")
704
705         if self.fast_parser:
706             command.append("--fast-parser")
707
708         command.extend([workflowpath, "/var/lib/cwl/cwl.input.json"])
709
710         container_req["command"] = command
711
712         return container_req
713
714
715     def run(self, runtimeContext):
716         runtimeContext.keepprefix = "keep:"
717         job_spec = self.arvados_job_spec(runtimeContext, self.git_info)
718         if runtimeContext.project_uuid:
719             job_spec["owner_uuid"] = runtimeContext.project_uuid
720
721         extra_submit_params = {}
722         if runtimeContext.submit_runner_cluster:
723             extra_submit_params["cluster_id"] = runtimeContext.submit_runner_cluster
724
725         if runtimeContext.submit_request_uuid:
726             if "cluster_id" in extra_submit_params:
727                 # Doesn't make sense for "update" and actually fails
728                 del extra_submit_params["cluster_id"]
729             response = self.arvrunner.api.container_requests().update(
730                 uuid=runtimeContext.submit_request_uuid,
731                 body=job_spec,
732                 **extra_submit_params
733             ).execute(num_retries=self.arvrunner.num_retries)
734         else:
735             response = self.arvrunner.api.container_requests().create(
736                 body=job_spec,
737                 **extra_submit_params
738             ).execute(num_retries=self.arvrunner.num_retries)
739
740         self.uuid = response["uuid"]
741         self.arvrunner.process_submitted(self)
742
743         logger.info("%s submitted container_request %s", self.arvrunner.label(self), response["uuid"])
744
745         workbench2 = self.arvrunner.api.config()["Services"]["Workbench2"]["ExternalURL"]
746         if workbench2:
747             url = "{}processes/{}".format(workbench2, response["uuid"])
748             logger.info("Monitor workflow progress at %s", url)
749
750
751     def done(self, record):
752         try:
753             container = self.arvrunner.api.containers().get(
754                 uuid=record["container_uuid"]
755             ).execute(num_retries=self.arvrunner.num_retries)
756             container["log"] = record["log_uuid"]
757         except Exception:
758             logger.exception("%s while getting runner container", self.arvrunner.label(self))
759             self.arvrunner.output_callback({}, "permanentFail")
760         else:
761             super(RunnerContainer, self).done(container)