2 # Copyright (C) The Arvados Authors. All rights reserved.
4 # SPDX-License-Identifier: Apache-2.0
6 # Implement cwl-runner interface for submitting and running work on Arvados, using
7 # the Crunch containers API.
9 from future.utils import viewitems
10 from builtins import str
17 import pkg_resources # part of setuptools
19 from schema_salad.sourceline import SourceLine
20 import schema_salad.validate as validate
22 import cwltool.workflow
23 import cwltool.process
24 import cwltool.argparser
25 from cwltool.errors import WorkflowException
26 from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
27 from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing
31 from arvados.keep import KeepClient
32 from arvados.errors import ApiError
33 import arvados.commands._util as arv_cmd
34 from arvados.api import OrderedJsonModel
36 from .perf import Perf
37 from ._version import __version__
38 from .executor import ArvCwlExecutor
40 # These aren't used directly in this file but
41 # other code expects to import them from here
42 from .arvcontainer import ArvadosContainer
43 from .arvtool import ArvadosCommandTool
44 from .fsaccess import CollectionFsAccess, CollectionCache, CollectionFetcher
45 from .util import get_current_container
46 from .executor import RuntimeStatusLoggingHandler, DEFAULT_PRIORITY
47 from .arvworkflow import ArvadosWorkflow
49 logger = logging.getLogger('arvados.cwl-runner')
50 metrics = logging.getLogger('arvados.cwl-runner.metrics')
51 logger.setLevel(logging.INFO)
53 arvados.log_handler.setFormatter(logging.Formatter(
54 '%(asctime)s %(name)s %(levelname)s: %(message)s',
58 """Print version string of key packages for provenance and debugging."""
60 arvcwlpkg = pkg_resources.require("arvados-cwl-runner")
61 arvpkg = pkg_resources.require("arvados-python-client")
62 cwlpkg = pkg_resources.require("cwltool")
64 return "%s %s, %s %s, %s %s" % (sys.argv[0], arvcwlpkg[0].version,
65 "arvados-python-client", arvpkg[0].version,
66 "cwltool", cwlpkg[0].version)
69 def arg_parser(): # type: () -> argparse.ArgumentParser
70 parser = argparse.ArgumentParser(description='Arvados executor for Common Workflow Language')
72 parser.add_argument("--basedir",
73 help="Base directory used to resolve relative references in the input, default to directory of input object file or current directory (if inputs piped/provided on command line).")
74 parser.add_argument("--outdir", default=os.path.abspath('.'),
75 help="Output directory, default current directory")
77 parser.add_argument("--eval-timeout",
78 help="Time to wait for a Javascript expression to evaluate before giving an error, default 20s.",
82 exgroup = parser.add_mutually_exclusive_group()
83 exgroup.add_argument("--print-dot", action="store_true",
84 help="Print workflow visualization in graphviz format and exit")
85 exgroup.add_argument("--version", action="version", help="Print version and exit", version=versionstring())
86 exgroup.add_argument("--validate", action="store_true", help="Validate CWL document only.")
88 exgroup = parser.add_mutually_exclusive_group()
89 exgroup.add_argument("--verbose", action="store_true", help="Default logging")
90 exgroup.add_argument("--quiet", action="store_true", help="Only print warnings and errors.")
91 exgroup.add_argument("--debug", action="store_true", help="Print even more logging")
93 parser.add_argument("--metrics", action="store_true", help="Print timing metrics")
95 parser.add_argument("--tool-help", action="store_true", help="Print command line help for tool")
97 exgroup = parser.add_mutually_exclusive_group()
98 exgroup.add_argument("--enable-reuse", action="store_true",
99 default=True, dest="enable_reuse",
100 help="Enable container reuse (default)")
101 exgroup.add_argument("--disable-reuse", action="store_false",
102 default=True, dest="enable_reuse",
103 help="Disable container reuse")
105 parser.add_argument("--project-uuid", metavar="UUID", help="Project that will own the workflow containers, if not provided, will go to home project.")
106 parser.add_argument("--output-name", help="Name to use for collection that stores the final output.", default=None)
107 parser.add_argument("--output-tags", help="Tags for the final output collection separated by commas, e.g., '--output-tags tag0,tag1,tag2'.", default=None)
108 parser.add_argument("--ignore-docker-for-reuse", action="store_true",
109 help="Ignore Docker image version when deciding whether to reuse past containers.",
112 exgroup = parser.add_mutually_exclusive_group()
113 exgroup.add_argument("--submit", action="store_true", help="Submit workflow to run on Arvados.",
114 default=True, dest="submit")
115 exgroup.add_argument("--local", action="store_false", help="Run workflow on local host (submits containers to Arvados).",
116 default=True, dest="submit")
117 exgroup.add_argument("--create-template", action="store_true", help="(Deprecated) synonym for --create-workflow.",
118 dest="create_workflow")
119 exgroup.add_argument("--create-workflow", action="store_true", help="Register an Arvados workflow that can be run from Workbench")
120 exgroup.add_argument("--update-workflow", metavar="UUID", help="Update an existing Arvados workflow with the given UUID.")
122 exgroup = parser.add_mutually_exclusive_group()
123 exgroup.add_argument("--wait", action="store_true", help="After submitting workflow runner, wait for completion.",
124 default=True, dest="wait")
125 exgroup.add_argument("--no-wait", action="store_false", help="Submit workflow runner and exit.",
126 default=True, dest="wait")
128 exgroup = parser.add_mutually_exclusive_group()
129 exgroup.add_argument("--log-timestamps", action="store_true", help="Prefix logging lines with timestamp",
130 default=True, dest="log_timestamps")
131 exgroup.add_argument("--no-log-timestamps", action="store_false", help="No timestamp on logging lines",
132 default=True, dest="log_timestamps")
134 parser.add_argument("--api",
135 default=None, dest="work_api",
136 choices=("containers",),
137 help="Select work submission API. Only supports 'containers'")
139 parser.add_argument("--compute-checksum", action="store_true", default=False,
140 help="Compute checksum of contents while collecting outputs",
141 dest="compute_checksum")
143 parser.add_argument("--submit-runner-ram", type=int,
144 help="RAM (in MiB) required for the workflow runner job (default 1024)",
147 parser.add_argument("--submit-runner-image",
148 help="Docker image for workflow runner job, default arvados/jobs:%s" % __version__,
151 parser.add_argument("--always-submit-runner", action="store_true",
152 help="When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool",
155 exgroup = parser.add_mutually_exclusive_group()
156 exgroup.add_argument("--submit-request-uuid",
158 help="Update and commit to supplied container request instead of creating a new one.",
160 exgroup.add_argument("--submit-runner-cluster",
161 help="Submit workflow runner to a remote cluster",
163 metavar="CLUSTER_ID")
165 parser.add_argument("--collection-cache-size", type=int,
167 help="Collection cache size (in MiB, default 256).")
169 parser.add_argument("--name",
170 help="Name to use for workflow execution instance.",
173 parser.add_argument("--on-error",
174 help="Desired workflow behavior when a step fails. One of 'stop' (do not submit any more steps) or "
175 "'continue' (may submit other steps that are not downstream from the error). Default is 'continue'.",
176 default="continue", choices=("stop", "continue"))
178 parser.add_argument("--enable-dev", action="store_true",
179 help="Enable loading and running development versions "
180 "of the CWL standards.", default=False)
181 parser.add_argument('--storage-classes', default="default",
182 help="Specify comma separated list of storage classes to be used when saving final workflow output to Keep.")
183 parser.add_argument('--intermediate-storage-classes', default="default",
184 help="Specify comma separated list of storage classes to be used when saving intermediate workflow output to Keep.")
186 parser.add_argument("--intermediate-output-ttl", type=int, metavar="N",
187 help="If N > 0, intermediate output collections will be trashed N seconds after creation. Default is 0 (don't trash).",
190 parser.add_argument("--priority", type=int,
191 help="Workflow priority (range 1..1000, higher has precedence over lower)",
192 default=DEFAULT_PRIORITY)
194 parser.add_argument("--disable-validate", dest="do_validate",
195 action="store_false", default=True,
196 help=argparse.SUPPRESS)
198 parser.add_argument("--disable-color", dest="enable_color",
199 action="store_false", default=True,
200 help=argparse.SUPPRESS)
202 parser.add_argument("--disable-js-validation",
203 action="store_true", default=False,
204 help=argparse.SUPPRESS)
206 parser.add_argument("--thread-count", type=int,
207 default=0, help="Number of threads to use for job submit and output collection.")
209 parser.add_argument("--http-timeout", type=int,
210 default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
215 help="Skip loading of schemas",
220 exgroup = parser.add_mutually_exclusive_group()
221 exgroup.add_argument("--trash-intermediate", action="store_true",
222 default=False, dest="trash_intermediate",
223 help="Immediately trash intermediate outputs on workflow success.")
224 exgroup.add_argument("--no-trash-intermediate", action="store_false",
225 default=False, dest="trash_intermediate",
226 help="Do not trash intermediate outputs (default).")
228 parser.add_argument("workflow", default=None, help="The workflow to execute")
229 parser.add_argument("job_order", nargs=argparse.REMAINDER, help="The input object to the workflow.")
234 cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*")
235 cwltool.command_line_tool.ACCEPTLIST_RE = cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE
236 supported_versions = ["v1.0", "v1.1", "v1.2"]
237 for s in supported_versions:
238 res = pkg_resources.resource_stream(__name__, 'arv-cwl-schema-%s.yml' % s)
239 customschema = res.read().decode('utf-8')
240 use_custom_schema(s, "http://arvados.org/cwl", customschema)
242 cwltool.process.supportedProcessRequirements.extend([
243 "http://arvados.org/cwl#RunInSingleContainer",
244 "http://arvados.org/cwl#OutputDirType",
245 "http://arvados.org/cwl#RuntimeConstraints",
246 "http://arvados.org/cwl#PartitionRequirement",
247 "http://arvados.org/cwl#APIRequirement",
248 "http://commonwl.org/cwltool#LoadListingRequirement",
249 "http://arvados.org/cwl#IntermediateOutput",
250 "http://arvados.org/cwl#ReuseRequirement",
251 "http://arvados.org/cwl#ClusterTarget",
252 "http://arvados.org/cwl#OutputStorageClass",
253 "http://arvados.org/cwl#ProcessProperties"
256 def exit_signal_handler(sigcode, frame):
257 logger.error(str(u"Caught signal {}, exiting.").format(sigcode))
260 def main(args, stdout, stderr, api_client=None, keep_client=None,
261 install_sig_handlers=True):
262 parser = arg_parser()
264 job_order_object = None
265 arvargs = parser.parse_args(args)
267 arvargs.use_container = True
268 arvargs.relax_path_checks = True
269 arvargs.print_supported_versions = False
271 if install_sig_handlers:
272 arv_cmd.install_signal_handlers()
274 if arvargs.update_workflow:
275 if arvargs.update_workflow.find('-7fd4e-') == 5:
276 want_api = 'containers'
279 if want_api and arvargs.work_api and want_api != arvargs.work_api:
280 logger.error(str(u'--update-workflow arg {!r} uses {!r} API, but --api={!r} specified').format(
281 arvargs.update_workflow, want_api, arvargs.work_api))
283 arvargs.work_api = want_api
285 if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order:
286 job_order_object = ({}, "")
290 for key, val in viewitems(cwltool.argparser.get_default_args()):
291 if not hasattr(arvargs, key):
292 setattr(arvargs, key, val)
295 if api_client is None:
296 api_client = arvados.safeapi.ThreadSafeApiCache(
297 api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout},
298 keep_params={"num_retries": 4})
299 keep_client = api_client.keep
300 # Make an API object now so errors are reported early.
301 api_client.users().current().execute()
302 if keep_client is None:
303 keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
304 executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4, stdout=stdout)
305 except WorkflowException as e:
306 logger.error(e, exc_info=(sys.exc_info()[1] if arvargs.debug else False))
309 logger.exception("Error creating the Arvados CWL Executor")
312 # Note that unless in debug mode, some stack traces related to user
313 # workflow errors may be suppressed.
315 logger.setLevel(logging.DEBUG)
316 logging.getLogger('arvados').setLevel(logging.DEBUG)
319 logger.setLevel(logging.WARN)
320 logging.getLogger('arvados').setLevel(logging.WARN)
321 logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
324 metrics.setLevel(logging.DEBUG)
325 logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)
327 if arvargs.log_timestamps:
328 arvados.log_handler.setFormatter(logging.Formatter(
329 '%(asctime)s %(name)s %(levelname)s: %(message)s',
330 '%Y-%m-%d %H:%M:%S'))
332 arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))
334 if stdout is sys.stdout:
335 # cwltool.main has code to work around encoding issues with
336 # sys.stdout and unix pipes (they default to ASCII encoding,
337 # we want utf-8), so when stdout is sys.stdout set it to None
338 # to take advantage of that. Don't override it for all cases
339 # since we still want to be able to capture stdout for the
343 return cwltool.main.main(args=arvargs,
346 executor=executor.arv_executor,
347 versionfunc=versionstring,
348 job_order_object=job_order_object,
349 logger_handler=arvados.log_handler,
350 custom_schema_callback=add_arv_hints,
351 loadingContext=executor.loadingContext,
352 runtimeContext=executor.runtimeContext,
353 input_required=not (arvargs.create_workflow or arvargs.update_workflow))