19464: Record git info as properties
[arvados.git] / sdk / cwl / arvados_cwl / executor.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import division
6 from builtins import next
7 from builtins import object
8 from builtins import str
9 from future.utils import viewvalues, viewitems
10
11 import argparse
12 import logging
13 import os
14 import sys
15 import threading
16 import copy
17 import json
18 import re
19 from functools import partial
20 import subprocess
21 import time
22 import urllib
23
24 from cwltool.errors import WorkflowException
25 import cwltool.workflow
26 from schema_salad.sourceline import SourceLine
27 import schema_salad.validate as validate
28 from schema_salad.ref_resolver import file_uri, uri_file_path
29
30 import arvados
31 import arvados.config
32 from arvados.keep import KeepClient
33 from arvados.errors import ApiError
34
35 import arvados_cwl.util
36 from .arvcontainer import RunnerContainer, cleanup_name_for_collection
37 from .runner import Runner, upload_docker, upload_job_order, upload_workflow_deps, make_builder
38 from .arvtool import ArvadosCommandTool, validate_cluster_target, ArvadosExpressionTool
39 from .arvworkflow import ArvadosWorkflow, upload_workflow
40 from .fsaccess import CollectionFsAccess, CollectionFetcher, collectionResolver, CollectionCache, pdh_size
41 from .perf import Perf
42 from .pathmapper import NoFollowPathMapper
43 from cwltool.task_queue import TaskQueue
44 from .context import ArvLoadingContext, ArvRuntimeContext
45 from ._version import __version__
46
47 from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
48 from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing, visit_class, aslist
49 from cwltool.command_line_tool import compute_checksums
50 from cwltool.load_tool import load_tool
51
52 logger = logging.getLogger('arvados.cwl-runner')
53 metrics = logging.getLogger('arvados.cwl-runner.metrics')
54
55 DEFAULT_PRIORITY = 500
56
57 class RuntimeStatusLoggingHandler(logging.Handler):
58     """
59     Intercepts logging calls and report them as runtime statuses on runner
60     containers.
61     """
62     def __init__(self, runtime_status_update_func):
63         super(RuntimeStatusLoggingHandler, self).__init__()
64         self.runtime_status_update = runtime_status_update_func
65         self.updatingRuntimeStatus = False
66
67     def emit(self, record):
68         kind = None
69         if record.levelno >= logging.ERROR:
70             kind = 'error'
71         elif record.levelno >= logging.WARNING:
72             kind = 'warning'
73         if kind is not None and self.updatingRuntimeStatus is not True:
74             self.updatingRuntimeStatus = True
75             try:
76                 log_msg = record.getMessage()
77                 if '\n' in log_msg:
78                     # If the logged message is multi-line, use its first line as status
79                     # and the rest as detail.
80                     status, detail = log_msg.split('\n', 1)
81                     self.runtime_status_update(
82                         kind,
83                         "%s: %s" % (record.name, status),
84                         detail
85                     )
86                 else:
87                     self.runtime_status_update(
88                         kind,
89                         "%s: %s" % (record.name, record.getMessage())
90                     )
91             finally:
92                 self.updatingRuntimeStatus = False
93
94
95 class ArvCwlExecutor(object):
96     """Execute a CWL tool or workflow, submit work (using containers API),
97     wait for them to complete, and report output.
98
99     """
100
101     def __init__(self, api_client,
102                  arvargs=None,
103                  keep_client=None,
104                  num_retries=4,
105                  thread_count=4,
106                  stdout=sys.stdout):
107
108         if arvargs is None:
109             arvargs = argparse.Namespace()
110             arvargs.work_api = None
111             arvargs.output_name = None
112             arvargs.output_tags = None
113             arvargs.thread_count = 1
114             arvargs.collection_cache_size = None
115
116         self.api = api_client
117         self.processes = {}
118         self.workflow_eval_lock = threading.Condition(threading.RLock())
119         self.final_output = None
120         self.final_status = None
121         self.num_retries = num_retries
122         self.uuid = None
123         self.stop_polling = threading.Event()
124         self.poll_api = None
125         self.pipeline = None
126         self.final_output_collection = None
127         self.output_name = arvargs.output_name
128         self.output_tags = arvargs.output_tags
129         self.project_uuid = None
130         self.intermediate_output_ttl = 0
131         self.intermediate_output_collections = []
132         self.trash_intermediate = False
133         self.thread_count = arvargs.thread_count
134         self.poll_interval = 12
135         self.loadingContext = None
136         self.should_estimate_cache_size = True
137         self.fs_access = None
138         self.secret_store = None
139         self.stdout = stdout
140
141         if keep_client is not None:
142             self.keep_client = keep_client
143         else:
144             self.keep_client = arvados.keep.KeepClient(api_client=self.api, num_retries=self.num_retries)
145
146         if arvargs.collection_cache_size:
147             collection_cache_size = arvargs.collection_cache_size*1024*1024
148             self.should_estimate_cache_size = False
149         else:
150             collection_cache_size = 256*1024*1024
151
152         self.collection_cache = CollectionCache(self.api, self.keep_client, self.num_retries,
153                                                 cap=collection_cache_size)
154
155         self.fetcher_constructor = partial(CollectionFetcher,
156                                            api_client=self.api,
157                                            fs_access=CollectionFsAccess("", collection_cache=self.collection_cache),
158                                            num_retries=self.num_retries)
159
160         self.work_api = None
161         expected_api = ["containers"]
162         for api in expected_api:
163             try:
164                 methods = self.api._rootDesc.get('resources')[api]['methods']
165                 if ('httpMethod' in methods['create'] and
166                     (arvargs.work_api == api or arvargs.work_api is None)):
167                     self.work_api = api
168                     break
169             except KeyError:
170                 pass
171
172         if not self.work_api:
173             if arvargs.work_api is None:
174                 raise Exception("No supported APIs")
175             else:
176                 raise Exception("Unsupported API '%s', expected one of %s" % (arvargs.work_api, expected_api))
177
178         if self.work_api == "jobs":
179             logger.error("""
180 *******************************
181 The 'jobs' API is no longer supported.
182 *******************************""")
183             exit(1)
184
185         self.loadingContext = ArvLoadingContext(vars(arvargs))
186         self.loadingContext.fetcher_constructor = self.fetcher_constructor
187         self.loadingContext.resolver = partial(collectionResolver, self.api, num_retries=self.num_retries)
188         self.loadingContext.construct_tool_object = self.arv_make_tool
189
190         # Add a custom logging handler to the root logger for runtime status reporting
191         # if running inside a container
192         if arvados_cwl.util.get_current_container(self.api, self.num_retries, logger):
193             root_logger = logging.getLogger('')
194
195             # Remove existing RuntimeStatusLoggingHandlers if they exist
196             handlers = [h for h in root_logger.handlers if not isinstance(h, RuntimeStatusLoggingHandler)]
197             root_logger.handlers = handlers
198
199             handler = RuntimeStatusLoggingHandler(self.runtime_status_update)
200             root_logger.addHandler(handler)
201
202         self.toplevel_runtimeContext = ArvRuntimeContext(vars(arvargs))
203         self.toplevel_runtimeContext.make_fs_access = partial(CollectionFsAccess,
204                                                      collection_cache=self.collection_cache)
205
206         validate_cluster_target(self, self.toplevel_runtimeContext)
207
208
209     def arv_make_tool(self, toolpath_object, loadingContext):
210         if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
211             return ArvadosCommandTool(self, toolpath_object, loadingContext)
212         elif "class" in toolpath_object and toolpath_object["class"] == "Workflow":
213             return ArvadosWorkflow(self, toolpath_object, loadingContext)
214         elif "class" in toolpath_object and toolpath_object["class"] == "ExpressionTool":
215             return ArvadosExpressionTool(self, toolpath_object, loadingContext)
216         else:
217             raise Exception("Unknown tool %s" % toolpath_object.get("class"))
218
219     def output_callback(self, out, processStatus):
220         with self.workflow_eval_lock:
221             if processStatus == "success":
222                 logger.info("Overall process status is %s", processStatus)
223                 state = "Complete"
224             else:
225                 logger.error("Overall process status is %s", processStatus)
226                 state = "Failed"
227             if self.pipeline:
228                 self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
229                                                         body={"state": state}).execute(num_retries=self.num_retries)
230             self.final_status = processStatus
231             self.final_output = out
232             self.workflow_eval_lock.notifyAll()
233
234
235     def start_run(self, runnable, runtimeContext):
236         self.task_queue.add(partial(runnable.run, runtimeContext),
237                             self.workflow_eval_lock, self.stop_polling)
238
239     def process_submitted(self, container):
240         with self.workflow_eval_lock:
241             self.processes[container.uuid] = container
242
243     def process_done(self, uuid, record):
244         with self.workflow_eval_lock:
245             j = self.processes[uuid]
246             logger.info("%s %s is %s", self.label(j), uuid, record["state"])
247             self.task_queue.add(partial(j.done, record),
248                                 self.workflow_eval_lock, self.stop_polling)
249             del self.processes[uuid]
250
251     def runtime_status_update(self, kind, message, detail=None):
252         """
253         Updates the runtime_status field on the runner container.
254         Called when there's a need to report errors, warnings or just
255         activity statuses, for example in the RuntimeStatusLoggingHandler.
256         """
257
258         if kind not in ('error', 'warning'):
259             # Ignore any other status kind
260             return
261
262         with self.workflow_eval_lock:
263             current = None
264             try:
265                 current = arvados_cwl.util.get_current_container(self.api, self.num_retries, logger)
266             except Exception as e:
267                 logger.info("Couldn't get current container: %s", e)
268             if current is None:
269                 return
270             runtime_status = current.get('runtime_status', {})
271
272             original_updatemessage = updatemessage = runtime_status.get(kind, "")
273             if not updatemessage:
274                 updatemessage = message
275
276             # Subsequent messages tacked on in detail
277             original_updatedetail = updatedetail = runtime_status.get(kind+'Detail', "")
278             maxlines = 40
279             if updatedetail.count("\n") < maxlines:
280                 if updatedetail:
281                     updatedetail += "\n"
282                 updatedetail += message + "\n"
283
284                 if detail:
285                     updatedetail += detail + "\n"
286
287                 if updatedetail.count("\n") >= maxlines:
288                     updatedetail += "\nSome messages may have been omitted.  Check the full log."
289
290             if updatemessage == original_updatemessage and updatedetail == original_updatedetail:
291                 # don't waste time doing an update if nothing changed
292                 # (usually because we exceeded the max lines)
293                 return
294
295             runtime_status.update({
296                 kind: updatemessage,
297                 kind+'Detail': updatedetail,
298             })
299
300             try:
301                 self.api.containers().update(uuid=current['uuid'],
302                                             body={
303                                                 'runtime_status': runtime_status,
304                                             }).execute(num_retries=self.num_retries)
305             except Exception as e:
306                 logger.info("Couldn't update runtime_status: %s", e)
307
308     def wrapped_callback(self, cb, obj, st):
309         with self.workflow_eval_lock:
310             cb(obj, st)
311             self.workflow_eval_lock.notifyAll()
312
313     def get_wrapped_callback(self, cb):
314         return partial(self.wrapped_callback, cb)
315
316     def on_message(self, event):
317         if event.get("object_uuid") in self.processes and event["event_type"] == "update":
318             uuid = event["object_uuid"]
319             if event["properties"]["new_attributes"]["state"] == "Running":
320                 with self.workflow_eval_lock:
321                     j = self.processes[uuid]
322                     if j.running is False:
323                         j.running = True
324                         j.update_pipeline_component(event["properties"]["new_attributes"])
325                         logger.info("%s %s is Running", self.label(j), uuid)
326             elif event["properties"]["new_attributes"]["state"] in ("Complete", "Failed", "Cancelled", "Final"):
327                 self.process_done(uuid, event["properties"]["new_attributes"])
328
329     def label(self, obj):
330         return "[%s %s]" % (self.work_api[0:-1], obj.name)
331
332     def poll_states(self):
333         """Poll status of containers listed in the processes dict.
334
335         Runs in a separate thread.
336         """
337
338         try:
339             remain_wait = self.poll_interval
340             while True:
341                 if remain_wait > 0:
342                     self.stop_polling.wait(remain_wait)
343                 if self.stop_polling.is_set():
344                     break
345                 with self.workflow_eval_lock:
346                     keys = list(self.processes)
347                 if not keys:
348                     remain_wait = self.poll_interval
349                     continue
350
351                 begin_poll = time.time()
352                 if self.work_api == "containers":
353                     table = self.poll_api.container_requests()
354
355                 pageSize = self.poll_api._rootDesc.get('maxItemsPerResponse', 1000)
356
357                 while keys:
358                     page = keys[:pageSize]
359                     try:
360                         proc_states = table.list(filters=[["uuid", "in", page]]).execute(num_retries=self.num_retries)
361                     except Exception:
362                         logger.exception("Error checking states on API server: %s")
363                         remain_wait = self.poll_interval
364                         continue
365
366                     for p in proc_states["items"]:
367                         self.on_message({
368                             "object_uuid": p["uuid"],
369                             "event_type": "update",
370                             "properties": {
371                                 "new_attributes": p
372                             }
373                         })
374                     keys = keys[pageSize:]
375
376                 finish_poll = time.time()
377                 remain_wait = self.poll_interval - (finish_poll - begin_poll)
378         except:
379             logger.exception("Fatal error in state polling thread.")
380             with self.workflow_eval_lock:
381                 self.processes.clear()
382                 self.workflow_eval_lock.notifyAll()
383         finally:
384             self.stop_polling.set()
385
386     def add_intermediate_output(self, uuid):
387         if uuid:
388             self.intermediate_output_collections.append(uuid)
389
390     def trash_intermediate_output(self):
391         logger.info("Cleaning up intermediate output collections")
392         for i in self.intermediate_output_collections:
393             try:
394                 self.api.collections().delete(uuid=i).execute(num_retries=self.num_retries)
395             except Exception:
396                 logger.warning("Failed to delete intermediate output: %s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
397             except (KeyboardInterrupt, SystemExit):
398                 break
399
400     def check_features(self, obj, parentfield=""):
401         if isinstance(obj, dict):
402             if obj.get("class") == "DockerRequirement":
403                 if obj.get("dockerOutputDirectory"):
404                     if not obj.get("dockerOutputDirectory").startswith('/'):
405                         raise SourceLine(obj, "dockerOutputDirectory", validate.ValidationException).makeError(
406                             "Option 'dockerOutputDirectory' must be an absolute path.")
407             if obj.get("class") == "InplaceUpdateRequirement":
408                 if obj["inplaceUpdate"] and parentfield == "requirements":
409                     raise SourceLine(obj, "class", UnsupportedRequirement).makeError("InplaceUpdateRequirement not supported for keep collections.")
410             for k,v in viewitems(obj):
411                 self.check_features(v, parentfield=k)
412         elif isinstance(obj, list):
413             for i,v in enumerate(obj):
414                 with SourceLine(obj, i, UnsupportedRequirement, logger.isEnabledFor(logging.DEBUG)):
415                     self.check_features(v, parentfield=parentfield)
416
417     def make_output_collection(self, name, storage_classes, tagsString, output_properties, outputObj):
418         outputObj = copy.deepcopy(outputObj)
419
420         files = []
421         def capture(fileobj):
422             files.append(fileobj)
423
424         adjustDirObjs(outputObj, capture)
425         adjustFileObjs(outputObj, capture)
426
427         generatemapper = NoFollowPathMapper(files, "", "", separateDirs=False)
428
429         final = arvados.collection.Collection(api_client=self.api,
430                                               keep_client=self.keep_client,
431                                               num_retries=self.num_retries)
432
433         for k,v in generatemapper.items():
434             if v.type == "Directory" and v.resolved.startswith("_:"):
435                     continue
436             if v.type == "CreateFile" and (k.startswith("_:") or v.resolved.startswith("_:")):
437                 with final.open(v.target, "wb") as f:
438                     f.write(v.resolved.encode("utf-8"))
439                     continue
440
441             if not v.resolved.startswith("keep:"):
442                 raise Exception("Output source is not in keep or a literal")
443             sp = v.resolved.split("/")
444             srccollection = sp[0][5:]
445             try:
446                 reader = self.collection_cache.get(srccollection)
447                 srcpath = urllib.parse.unquote("/".join(sp[1:]) if len(sp) > 1 else ".")
448                 final.copy(srcpath, v.target, source_collection=reader, overwrite=False)
449             except arvados.errors.ArgumentError as e:
450                 logger.error("Creating CollectionReader for '%s' '%s': %s", k, v, e)
451                 raise
452             except IOError as e:
453                 logger.error("While preparing output collection: %s", e)
454                 raise
455
456         def rewrite(fileobj):
457             fileobj["location"] = generatemapper.mapper(fileobj["location"]).target
458             for k in ("listing", "contents", "nameext", "nameroot", "dirname"):
459                 if k in fileobj:
460                     del fileobj[k]
461
462         adjustDirObjs(outputObj, rewrite)
463         adjustFileObjs(outputObj, rewrite)
464
465         with final.open("cwl.output.json", "w") as f:
466             res = str(json.dumps(outputObj, sort_keys=True, indent=4, separators=(',',': '), ensure_ascii=False))
467             f.write(res)
468
469
470         final.save_new(name=name, owner_uuid=self.project_uuid, storage_classes=storage_classes,
471                        ensure_unique_name=True, properties=output_properties)
472
473         logger.info("Final output collection %s \"%s\" (%s)", final.portable_data_hash(),
474                     final.api_response()["name"],
475                     final.manifest_locator())
476
477         final_uuid = final.manifest_locator()
478         tags = tagsString.split(',')
479         for tag in tags:
480              self.api.links().create(body={
481                 "head_uuid": final_uuid, "link_class": "tag", "name": tag
482                 }).execute(num_retries=self.num_retries)
483
484         def finalcollection(fileobj):
485             fileobj["location"] = "keep:%s/%s" % (final.portable_data_hash(), fileobj["location"])
486
487         adjustDirObjs(outputObj, finalcollection)
488         adjustFileObjs(outputObj, finalcollection)
489
490         return (outputObj, final)
491
492     def set_crunch_output(self):
493         if self.work_api == "containers":
494             current = arvados_cwl.util.get_current_container(self.api, self.num_retries, logger)
495             if current is None:
496                 return
497             try:
498                 self.api.containers().update(uuid=current['uuid'],
499                                              body={
500                                                  'output': self.final_output_collection.portable_data_hash(),
501                                                  'output_properties': self.final_output_collection.get_properties(),
502                                              }).execute(num_retries=self.num_retries)
503                 self.api.collections().update(uuid=self.final_output_collection.manifest_locator(),
504                                               body={
505                                                   'is_trashed': True
506                                               }).execute(num_retries=self.num_retries)
507             except Exception:
508                 logger.exception("Setting container output")
509                 raise
510
511     def apply_reqs(self, job_order_object, tool):
512         if "https://w3id.org/cwl/cwl#requirements" in job_order_object:
513             if tool.metadata.get("http://commonwl.org/cwltool#original_cwlVersion") == 'v1.0':
514                 raise WorkflowException(
515                     "`cwl:requirements` in the input object is not part of CWL "
516                     "v1.0. You can adjust to use `cwltool:overrides` instead; or you "
517                     "can set the cwlVersion to v1.1 or greater and re-run with "
518                     "--enable-dev.")
519             job_reqs = job_order_object["https://w3id.org/cwl/cwl#requirements"]
520             for req in job_reqs:
521                 tool.requirements.append(req)
522
523     def get_git_info(self, tool):
524         in_a_git_repo = False
525         cwd = None
526         filepath = None
527
528         if tool.tool["id"].startswith("file://"):
529             # check if git is installed
530             try:
531                 filepath = uri_file_path(tool.tool["id"])
532                 cwd = os.path.dirname(filepath)
533                 subprocess.run(["git", "log", "--format=%H", "-n1", "HEAD"], cwd=cwd, check=True, capture_output=True, text=True)
534                 in_a_git_repo = True
535             except Exception as e:
536                 pass
537
538         gitproperties = {}
539
540         if in_a_git_repo:
541             git_commit = subprocess.run(["git", "log", "--format=%H", "-n1", "HEAD"], cwd=cwd, capture_output=True, text=True).stdout
542             git_date = subprocess.run(["git", "log", "--format=%cD", "-n1", "HEAD"], cwd=cwd, capture_output=True, text=True).stdout
543             git_committer = subprocess.run(["git", "log", "--format=%cn <%ce>", "-n1", "HEAD"], cwd=cwd, capture_output=True, text=True).stdout
544             git_branch = subprocess.run(["git", "branch", "--show-current"], cwd=cwd, capture_output=True, text=True).stdout
545             git_origin = subprocess.run(["git", "remote", "get-url", "origin"], cwd=cwd, capture_output=True, text=True).stdout
546             git_status = subprocess.run(["git", "status", "--untracked-files=no", "--porcelain"], cwd=cwd, capture_output=True, text=True).stdout
547             git_describe = subprocess.run(["git", "describe", "--always"], cwd=cwd, capture_output=True, text=True).stdout
548             git_toplevel = subprocess.run(["git", "rev-parse", "--show-toplevel"], cwd=cwd, capture_output=True, text=True).stdout
549             git_path = filepath[len(git_toplevel):]
550
551             gitproperties = {
552                 "http://arvados.org/cwl#gitCommit": git_commit.strip(),
553                 "http://arvados.org/cwl#gitDate": git_date.strip(),
554                 "http://arvados.org/cwl#gitCommitter": git_committer.strip(),
555                 "http://arvados.org/cwl#gitBranch": git_branch.strip(),
556                 "http://arvados.org/cwl#gitOrigin": git_origin.strip(),
557                 "http://arvados.org/cwl#gitStatus": git_status.strip(),
558                 "http://arvados.org/cwl#gitDescribe": git_describe.strip(),
559                 "http://arvados.org/cwl#gitPath": git_path.strip(),
560             }
561         else:
562             for g in ("http://arvados.org/cwl#gitCommit",
563                       "http://arvados.org/cwl#gitDate",
564                       "http://arvados.org/cwl#gitCommitter",
565                       "http://arvados.org/cwl#gitBranch",
566                       "http://arvados.org/cwl#gitOrigin",
567                       "http://arvados.org/cwl#gitStatus",
568                       "http://arvados.org/cwl#gitDescribe",
569                       "http://arvados.org/cwl#gitPath"):
570                 if g in tool.metadata:
571                     gitproperties[g] = tool.metadata[g]
572
573         return gitproperties
574
575     def set_container_request_properties(self, container, properties):
576         resp = self.api.container_requests().list(filters=[["container_uuid", "=", container["uuid"]]], select=["uuid", "properties"]).execute(num_retries=self.num_retries)
577         for cr in resp["item"]:
578             cr["properties"].update({k.replace("http://arvados.org/cwl#", "arv:"): v for k, v in properties.items()})
579             self.api.container_requests().update(uuid=cr["uuid"], body={"container_request": {"properties": cr["properties"]}}).execute(num_retries=self.num_retries)
580
581     def arv_executor(self, updated_tool, job_order, runtimeContext, logger=None):
582         self.debug = runtimeContext.debug
583
584         git_info = self.get_git_info(updated_tool)
585         if git_info:
586             logger.info("Git provenance")
587             for g in git_info:
588                 if git_info[g]:
589                     logger.info("  %s: %s", g.split("#", 1)[1], git_info[g])
590
591         workbench1 = self.api.config()["Services"]["Workbench1"]["ExternalURL"]
592         workbench2 = self.api.config()["Services"]["Workbench2"]["ExternalURL"]
593         controller = self.api.config()["Services"]["Controller"]["ExternalURL"]
594         logger.info("Using cluster %s (%s)", self.api.config()["ClusterID"], workbench2 or workbench1 or controller)
595
596         updated_tool.visit(self.check_features)
597
598         self.pipeline = None
599         self.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir)
600         self.secret_store = runtimeContext.secret_store
601
602         self.trash_intermediate = runtimeContext.trash_intermediate
603         if self.trash_intermediate and self.work_api != "containers":
604             raise Exception("--trash-intermediate is only supported with --api=containers.")
605
606         self.intermediate_output_ttl = runtimeContext.intermediate_output_ttl
607         if self.intermediate_output_ttl and self.work_api != "containers":
608             raise Exception("--intermediate-output-ttl is only supported with --api=containers.")
609         if self.intermediate_output_ttl < 0:
610             raise Exception("Invalid value %d for --intermediate-output-ttl, cannot be less than zero" % self.intermediate_output_ttl)
611
612         if runtimeContext.submit_request_uuid and self.work_api != "containers":
613             raise Exception("--submit-request-uuid requires containers API, but using '{}' api".format(self.work_api))
614
615         runtimeContext = runtimeContext.copy()
616
617         default_storage_classes = ",".join([k for k,v in self.api.config().get("StorageClasses", {"default": {"Default": True}}).items() if v.get("Default") is True])
618         if runtimeContext.storage_classes == "default":
619             runtimeContext.storage_classes = default_storage_classes
620         if runtimeContext.intermediate_storage_classes == "default":
621             runtimeContext.intermediate_storage_classes = default_storage_classes
622
623         if not runtimeContext.name:
624             self.name = updated_tool.tool.get("label") or updated_tool.metadata.get("label") or os.path.basename(updated_tool.tool["id"])
625             if git_info.get("http://arvados.org/cwl#gitDescribe"):
626                 self.name = "%s (%s)" % (self.name, git_info.get("http://arvados.org/cwl#gitDescribe"))
627             runtimeContext.name = self.name
628
629         if runtimeContext.copy_deps is None and (runtimeContext.create_workflow or runtimeContext.update_workflow):
630             # When creating or updating workflow record, by default
631             # always copy dependencies and ensure Docker images are up
632             # to date.
633             runtimeContext.copy_deps = True
634             runtimeContext.match_local_docker = True
635
636         if runtimeContext.update_workflow and self.project_uuid is None:
637             # If we are updating a workflow, make sure anything that
638             # gets uploaded goes into the same parent project, unless
639             # an alternate --project-uuid was provided.
640             existing_wf = self.api.workflows().get(uuid=runtimeContext.update_workflow).execute()
641             runtimeContext.project_uuid = existing_wf["owner_uuid"]
642
643         self.project_uuid = runtimeContext.project_uuid
644
645         # Upload local file references in the job order.
646         with Perf(metrics, "upload_job_order"):
647             job_order = upload_job_order(self, "%s input" % runtimeContext.name,
648                                          updated_tool, job_order, runtimeContext)
649
650         # the last clause means: if it is a command line tool, and we
651         # are going to wait for the result, and always_submit_runner
652         # is false, then we don't submit a runner process.
653
654         submitting = (runtimeContext.update_workflow or
655                       runtimeContext.create_workflow or
656                       (runtimeContext.submit and not
657                        (updated_tool.tool["class"] == "CommandLineTool" and
658                         runtimeContext.wait and
659                         not runtimeContext.always_submit_runner)))
660
661         loadingContext = self.loadingContext.copy()
662         loadingContext.do_validate = False
663         loadingContext.disable_js_validation = True
664         if submitting:
665             loadingContext.do_update = False
666             # Document may have been auto-updated. Reload the original
667             # document with updating disabled because we want to
668             # submit the document with its original CWL version, not
669             # the auto-updated one.
670             with Perf(metrics, "load_tool original"):
671                 tool = load_tool(updated_tool.tool["id"], loadingContext)
672         else:
673             tool = updated_tool
674
675         # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
676         # Also uploads docker images.
677         logger.info("Uploading workflow dependencies")
678         with Perf(metrics, "upload_workflow_deps"):
679             merged_map = upload_workflow_deps(self, tool, runtimeContext)
680
681         # Recreate process object (ArvadosWorkflow or
682         # ArvadosCommandTool) because tool document may have been
683         # updated by upload_workflow_deps in ways that modify
684         # inheritance of hints or requirements.
685         loadingContext.loader = tool.doc_loader
686         loadingContext.avsc_names = tool.doc_schema
687         loadingContext.metadata = tool.metadata
688         with Perf(metrics, "load_tool"):
689             tool = load_tool(tool.tool, loadingContext)
690
691         if runtimeContext.update_workflow or runtimeContext.create_workflow:
692             # Create a pipeline template or workflow record and exit.
693             if self.work_api == "containers":
694                 uuid = upload_workflow(self, tool, job_order,
695                                        runtimeContext.project_uuid,
696                                        runtimeContext,
697                                        uuid=runtimeContext.update_workflow,
698                                        submit_runner_ram=runtimeContext.submit_runner_ram,
699                                        name=runtimeContext.name,
700                                        merged_map=merged_map,
701                                        submit_runner_image=runtimeContext.submit_runner_image,
702                                        git_info=git_info)
703                 self.stdout.write(uuid + "\n")
704                 return (None, "success")
705
706         self.apply_reqs(job_order, tool)
707
708         self.ignore_docker_for_reuse = runtimeContext.ignore_docker_for_reuse
709         self.eval_timeout = runtimeContext.eval_timeout
710
711         runtimeContext.use_container = True
712         runtimeContext.tmpdir_prefix = "tmp"
713         runtimeContext.work_api = self.work_api
714
715         if not self.output_name:
716              self.output_name = "Output from workflow %s" % runtimeContext.name
717
718         self.output_name  = cleanup_name_for_collection(self.output_name)
719
720         if self.work_api == "containers":
721             if self.ignore_docker_for_reuse:
722                 raise Exception("--ignore-docker-for-reuse not supported with containers API.")
723             runtimeContext.outdir = "/var/spool/cwl"
724             runtimeContext.docker_outdir = "/var/spool/cwl"
725             runtimeContext.tmpdir = "/tmp"
726             runtimeContext.docker_tmpdir = "/tmp"
727
728         if runtimeContext.priority < 1 or runtimeContext.priority > 1000:
729             raise Exception("--priority must be in the range 1..1000.")
730
731         if self.should_estimate_cache_size:
732             visited = set()
733             estimated_size = [0]
734             def estimate_collection_cache(obj):
735                 if obj.get("location", "").startswith("keep:"):
736                     m = pdh_size.match(obj["location"][5:])
737                     if m and m.group(1) not in visited:
738                         visited.add(m.group(1))
739                         estimated_size[0] += int(m.group(2))
740             visit_class(job_order, ("File", "Directory"), estimate_collection_cache)
741             runtimeContext.collection_cache_size = max(((estimated_size[0]*192) // (1024*1024))+1, 256)
742             self.collection_cache.set_cap(runtimeContext.collection_cache_size*1024*1024)
743
744         logger.info("Using collection cache size %s MiB", runtimeContext.collection_cache_size)
745
746         runnerjob = None
747         if runtimeContext.submit:
748             # Submit a runner job to run the workflow for us.
749             if self.work_api == "containers":
750                 if submitting:
751                     tool = RunnerContainer(self, updated_tool,
752                                            tool, loadingContext, runtimeContext.enable_reuse,
753                                            self.output_name,
754                                            self.output_tags,
755                                            submit_runner_ram=runtimeContext.submit_runner_ram,
756                                            name=runtimeContext.name,
757                                            on_error=runtimeContext.on_error,
758                                            submit_runner_image=runtimeContext.submit_runner_image,
759                                            intermediate_output_ttl=runtimeContext.intermediate_output_ttl,
760                                            merged_map=merged_map,
761                                            priority=runtimeContext.priority,
762                                            secret_store=self.secret_store,
763                                            collection_cache_size=runtimeContext.collection_cache_size,
764                                            collection_cache_is_default=self.should_estimate_cache_size,
765                                            git_info=git_info)
766                 else:
767                     runtimeContext.runnerjob = tool.tool["id"]
768
769         if runtimeContext.cwl_runner_job is not None:
770             self.uuid = runtimeContext.cwl_runner_job.get('uuid')
771
772         jobiter = tool.job(job_order,
773                            self.output_callback,
774                            runtimeContext)
775
776         if runtimeContext.submit and not runtimeContext.wait:
777             runnerjob = next(jobiter)
778             runnerjob.run(runtimeContext)
779             self.stdout.write(runnerjob.uuid+"\n")
780             return (None, "success")
781
782         current_container = arvados_cwl.util.get_current_container(self.api, self.num_retries, logger)
783         if current_container:
784             logger.info("Running inside container %s", current_container.get("uuid"))
785             self.set_container_request_properties(current_container, git_info)
786
787         self.poll_api = arvados.api('v1', timeout=runtimeContext.http_timeout)
788         self.polling_thread = threading.Thread(target=self.poll_states)
789         self.polling_thread.start()
790
791         self.task_queue = TaskQueue(self.workflow_eval_lock, self.thread_count)
792
793         try:
794             self.workflow_eval_lock.acquire()
795
796             # Holds the lock while this code runs and releases it when
797             # it is safe to do so in self.workflow_eval_lock.wait(),
798             # at which point on_message can update job state and
799             # process output callbacks.
800
801             loopperf = Perf(metrics, "jobiter")
802             loopperf.__enter__()
803             for runnable in jobiter:
804                 loopperf.__exit__()
805
806                 if self.stop_polling.is_set():
807                     break
808
809                 if self.task_queue.error is not None:
810                     raise self.task_queue.error
811
812                 if runnable:
813                     with Perf(metrics, "run"):
814                         self.start_run(runnable, runtimeContext)
815                 else:
816                     if (self.task_queue.in_flight + len(self.processes)) > 0:
817                         self.workflow_eval_lock.wait(3)
818                     else:
819                         logger.error("Workflow is deadlocked, no runnable processes and not waiting on any pending processes.")
820                         break
821
822                 if self.stop_polling.is_set():
823                     break
824
825                 loopperf.__enter__()
826             loopperf.__exit__()
827
828             while (self.task_queue.in_flight + len(self.processes)) > 0:
829                 if self.task_queue.error is not None:
830                     raise self.task_queue.error
831                 self.workflow_eval_lock.wait(3)
832
833         except UnsupportedRequirement:
834             raise
835         except:
836             if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info()[0] is SystemExit:
837                 logger.error("Interrupted, workflow will be cancelled")
838             elif isinstance(sys.exc_info()[1], WorkflowException):
839                 logger.error("Workflow execution failed:\n%s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
840             else:
841                 logger.exception("Workflow execution failed")
842
843             if self.pipeline:
844                 self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
845                                                      body={"state": "Failed"}).execute(num_retries=self.num_retries)
846
847             if self.work_api == "containers" and not current_container:
848                 # Not running in a crunch container, so cancel any outstanding processes.
849                 for p in self.processes:
850                     try:
851                         self.api.container_requests().update(uuid=p,
852                                                              body={"priority": "0"}
853                         ).execute(num_retries=self.num_retries)
854                     except Exception:
855                         pass
856         finally:
857             self.workflow_eval_lock.release()
858             self.task_queue.drain()
859             self.stop_polling.set()
860             self.polling_thread.join()
861             self.task_queue.join()
862
863         if self.final_status == "UnsupportedRequirement":
864             raise UnsupportedRequirement("Check log for details.")
865
866         if self.final_output is None:
867             raise WorkflowException("Workflow did not return a result.")
868
869         if runtimeContext.submit and isinstance(tool, Runner):
870             logger.info("Final output collection %s", tool.final_output)
871             if workbench2 or workbench1:
872                 logger.info("Output at %scollections/%s", workbench2 or workbench1, tool.final_output)
873         else:
874             if self.output_tags is None:
875                 self.output_tags = ""
876
877             storage_classes = ""
878             storage_class_req, _ = tool.get_requirement("http://arvados.org/cwl#OutputStorageClass")
879             if storage_class_req and storage_class_req.get("finalStorageClass"):
880                 storage_classes = aslist(storage_class_req["finalStorageClass"])
881             else:
882                 storage_classes = runtimeContext.storage_classes.strip().split(",")
883
884             output_properties = {}
885             output_properties_req, _ = tool.get_requirement("http://arvados.org/cwl#OutputCollectionProperties")
886             if output_properties_req:
887                 builder = make_builder(job_order, tool.hints, tool.requirements, runtimeContext, tool.metadata)
888                 for pr in output_properties_req["outputProperties"]:
889                     output_properties[pr["propertyName"]] = builder.do_eval(pr["propertyValue"])
890
891             self.final_output, self.final_output_collection = self.make_output_collection(self.output_name, storage_classes,
892                                                                                           self.output_tags, output_properties,
893                                                                                           self.final_output)
894             self.set_crunch_output()
895
896         if runtimeContext.compute_checksum:
897             adjustDirObjs(self.final_output, partial(get_listing, self.fs_access))
898             adjustFileObjs(self.final_output, partial(compute_checksums, self.fs_access))
899
900         if self.trash_intermediate and self.final_status == "success":
901             self.trash_intermediate_output()
902
903         return (self.final_output, self.final_status)