2 # Copyright (C) The Arvados Authors. All rights reserved.
4 # SPDX-License-Identifier: Apache-2.0
6 # Implement cwl-runner interface for submitting and running work on Arvados, using
7 # either the Crunch jobs API or Crunch containers API.
14 import pkg_resources # part of setuptools
17 import cwltool.workflow
18 import cwltool.process
19 from schema_salad.sourceline import SourceLine
20 import schema_salad.validate as validate
21 import cwltool.argparser
25 from arvados.keep import KeepClient
26 from arvados.errors import ApiError
27 import arvados.commands._util as arv_cmd
29 from .perf import Perf
30 from ._version import __version__
31 from .executor import ArvCwlExecutor
33 from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
34 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, get_listing
36 from arvados.api import OrderedJsonModel
38 logger = logging.getLogger('arvados.cwl-runner')
39 metrics = logging.getLogger('arvados.cwl-runner.metrics')
40 logger.setLevel(logging.INFO)
42 arvados.log_handler.setFormatter(logging.Formatter(
43 '%(asctime)s %(name)s %(levelname)s: %(message)s',
46 DEFAULT_PRIORITY = 500
49 """Print version string of key packages for provenance and debugging."""
51 arvcwlpkg = pkg_resources.require("arvados-cwl-runner")
52 arvpkg = pkg_resources.require("arvados-python-client")
53 cwlpkg = pkg_resources.require("cwltool")
55 return "%s %s, %s %s, %s %s" % (sys.argv[0], arvcwlpkg[0].version,
56 "arvados-python-client", arvpkg[0].version,
57 "cwltool", cwlpkg[0].version)
60 def arg_parser(): # type: () -> argparse.ArgumentParser
61 parser = argparse.ArgumentParser(description='Arvados executor for Common Workflow Language')
63 parser.add_argument("--basedir", type=str,
64 help="Base directory used to resolve relative references in the input, default to directory of input object file or current directory (if inputs piped/provided on command line).")
65 parser.add_argument("--outdir", type=str, default=os.path.abspath('.'),
66 help="Output directory, default current directory")
68 parser.add_argument("--eval-timeout",
69 help="Time to wait for a Javascript expression to evaluate before giving an error, default 20s.",
73 exgroup = parser.add_mutually_exclusive_group()
74 exgroup.add_argument("--print-dot", action="store_true",
75 help="Print workflow visualization in graphviz format and exit")
76 exgroup.add_argument("--version", action="version", help="Print version and exit", version=versionstring())
77 exgroup.add_argument("--validate", action="store_true", help="Validate CWL document only.")
79 exgroup = parser.add_mutually_exclusive_group()
80 exgroup.add_argument("--verbose", action="store_true", help="Default logging")
81 exgroup.add_argument("--quiet", action="store_true", help="Only print warnings and errors.")
82 exgroup.add_argument("--debug", action="store_true", help="Print even more logging")
84 parser.add_argument("--metrics", action="store_true", help="Print timing metrics")
86 parser.add_argument("--tool-help", action="store_true", help="Print command line help for tool")
88 exgroup = parser.add_mutually_exclusive_group()
89 exgroup.add_argument("--enable-reuse", action="store_true",
90 default=True, dest="enable_reuse",
91 help="Enable job or container reuse (default)")
92 exgroup.add_argument("--disable-reuse", action="store_false",
93 default=True, dest="enable_reuse",
94 help="Disable job or container reuse")
96 parser.add_argument("--project-uuid", type=str, metavar="UUID", help="Project that will own the workflow jobs, if not provided, will go to home project.")
97 parser.add_argument("--output-name", type=str, help="Name to use for collection that stores the final output.", default=None)
98 parser.add_argument("--output-tags", type=str, help="Tags for the final output collection separated by commas, e.g., '--output-tags tag0,tag1,tag2'.", default=None)
99 parser.add_argument("--ignore-docker-for-reuse", action="store_true",
100 help="Ignore Docker image version when deciding whether to reuse past jobs.",
103 exgroup = parser.add_mutually_exclusive_group()
104 exgroup.add_argument("--submit", action="store_true", help="Submit workflow to run on Arvados.",
105 default=True, dest="submit")
106 exgroup.add_argument("--local", action="store_false", help="Run workflow on local host (submits jobs to Arvados).",
107 default=True, dest="submit")
108 exgroup.add_argument("--create-template", action="store_true", help="(Deprecated) synonym for --create-workflow.",
109 dest="create_workflow")
110 exgroup.add_argument("--create-workflow", action="store_true", help="Create an Arvados workflow (if using the 'containers' API) or pipeline template (if using the 'jobs' API). See --api.")
111 exgroup.add_argument("--update-workflow", type=str, metavar="UUID", help="Update an existing Arvados workflow or pipeline template with the given UUID.")
113 exgroup = parser.add_mutually_exclusive_group()
114 exgroup.add_argument("--wait", action="store_true", help="After submitting workflow runner job, wait for completion.",
115 default=True, dest="wait")
116 exgroup.add_argument("--no-wait", action="store_false", help="Submit workflow runner job and exit.",
117 default=True, dest="wait")
119 exgroup = parser.add_mutually_exclusive_group()
120 exgroup.add_argument("--log-timestamps", action="store_true", help="Prefix logging lines with timestamp",
121 default=True, dest="log_timestamps")
122 exgroup.add_argument("--no-log-timestamps", action="store_false", help="No timestamp on logging lines",
123 default=True, dest="log_timestamps")
125 parser.add_argument("--api", type=str,
126 default=None, dest="work_api",
127 choices=("jobs", "containers"),
128 help="Select work submission API. Default is 'jobs' if that API is available, otherwise 'containers'.")
130 parser.add_argument("--compute-checksum", action="store_true", default=False,
131 help="Compute checksum of contents while collecting outputs",
132 dest="compute_checksum")
134 parser.add_argument("--submit-runner-ram", type=int,
135 help="RAM (in MiB) required for the workflow runner job (default 1024)",
138 parser.add_argument("--submit-runner-image", type=str,
139 help="Docker image for workflow runner job, default arvados/jobs:%s" % __version__,
142 exgroup = parser.add_mutually_exclusive_group()
143 exgroup.add_argument("--submit-request-uuid", type=str,
145 help="Update and commit to supplied container request instead of creating a new one (containers API only).")
146 exgroup.add_argument("--submit-runner-cluster", type=str,
147 help="Submit toplevel runner to a remote cluster (containers API only)",
150 parser.add_argument("--name", type=str,
151 help="Name to use for workflow execution instance.",
154 parser.add_argument("--on-error", type=str,
155 help="Desired workflow behavior when a step fails. One of 'stop' or 'continue'. "
156 "Default is 'continue'.", default="continue", choices=("stop", "continue"))
158 parser.add_argument("--enable-dev", action="store_true",
159 help="Enable loading and running development versions "
160 "of CWL spec.", default=False)
161 parser.add_argument('--storage-classes', default="default", type=str,
162 help="Specify comma separated list of storage classes to be used when saving workflow output to Keep.")
164 parser.add_argument("--intermediate-output-ttl", type=int, metavar="N",
165 help="If N > 0, intermediate output collections will be trashed N seconds after creation. Default is 0 (don't trash).",
168 parser.add_argument("--priority", type=int,
169 help="Workflow priority (range 1..1000, higher has precedence over lower, containers api only)",
170 default=DEFAULT_PRIORITY)
172 parser.add_argument("--disable-validate", dest="do_validate",
173 action="store_false", default=True,
174 help=argparse.SUPPRESS)
176 parser.add_argument("--disable-js-validation",
177 action="store_true", default=False,
178 help=argparse.SUPPRESS)
180 parser.add_argument("--thread-count", type=int,
181 default=4, help="Number of threads to use for job submit and output collection.")
183 parser.add_argument("--http-timeout", type=int,
184 default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
186 exgroup = parser.add_mutually_exclusive_group()
187 exgroup.add_argument("--trash-intermediate", action="store_true",
188 default=False, dest="trash_intermediate",
189 help="Immediately trash intermediate outputs on workflow success.")
190 exgroup.add_argument("--no-trash-intermediate", action="store_false",
191 default=False, dest="trash_intermediate",
192 help="Do not trash intermediate outputs (default).")
194 parser.add_argument("workflow", type=str, default=None, help="The workflow to execute")
195 parser.add_argument("job_order", nargs=argparse.REMAINDER, help="The input object to the workflow.")
200 cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*")
201 cwltool.command_line_tool.ACCEPTLIST_RE = cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE
202 res = pkg_resources.resource_stream(__name__, 'arv-cwl-schema.yml')
203 use_custom_schema("v1.0", "http://arvados.org/cwl", res.read())
205 cwltool.process.supportedProcessRequirements.extend([
206 "http://arvados.org/cwl#RunInSingleContainer",
207 "http://arvados.org/cwl#OutputDirType",
208 "http://arvados.org/cwl#RuntimeConstraints",
209 "http://arvados.org/cwl#PartitionRequirement",
210 "http://arvados.org/cwl#APIRequirement",
211 "http://commonwl.org/cwltool#LoadListingRequirement",
212 "http://arvados.org/cwl#IntermediateOutput",
213 "http://arvados.org/cwl#ReuseRequirement",
214 "http://arvados.org/cwl#ClusterTarget"
217 def exit_signal_handler(sigcode, frame):
218 logger.error("Caught signal {}, exiting.".format(sigcode))
221 def main(args, stdout, stderr, api_client=None, keep_client=None,
222 install_sig_handlers=True):
223 parser = arg_parser()
225 job_order_object = None
226 arvargs = parser.parse_args(args)
228 if len(arvargs.storage_classes.strip().split(',')) > 1:
229 logger.error("Multiple storage classes are not supported currently.")
232 arvargs.use_container = True
233 arvargs.relax_path_checks = True
234 arvargs.print_supported_versions = False
236 if install_sig_handlers:
237 arv_cmd.install_signal_handlers()
239 if arvargs.update_workflow:
240 if arvargs.update_workflow.find('-7fd4e-') == 5:
241 want_api = 'containers'
242 elif arvargs.update_workflow.find('-p5p6p-') == 5:
246 if want_api and arvargs.work_api and want_api != arvargs.work_api:
247 logger.error('--update-workflow arg {!r} uses {!r} API, but --api={!r} specified'.format(
248 arvargs.update_workflow, want_api, arvargs.work_api))
250 arvargs.work_api = want_api
252 if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order:
253 job_order_object = ({}, "")
257 for key, val in cwltool.argparser.get_default_args().items():
258 if not hasattr(arvargs, key):
259 setattr(arvargs, key, val)
262 if api_client is None:
263 api_client = arvados.safeapi.ThreadSafeApiCache(
264 api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout},
265 keep_params={"num_retries": 4})
266 keep_client = api_client.keep
267 # Make an API object now so errors are reported early.
268 api_client.users().current().execute()
269 if keep_client is None:
270 keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
271 executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
272 except Exception as e:
277 logger.setLevel(logging.DEBUG)
278 logging.getLogger('arvados').setLevel(logging.DEBUG)
281 logger.setLevel(logging.WARN)
282 logging.getLogger('arvados').setLevel(logging.WARN)
283 logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
286 metrics.setLevel(logging.DEBUG)
287 logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)
289 if arvargs.log_timestamps:
290 arvados.log_handler.setFormatter(logging.Formatter(
291 '%(asctime)s %(name)s %(levelname)s: %(message)s',
292 '%Y-%m-%d %H:%M:%S'))
294 arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))
296 return cwltool.main.main(args=arvargs,
299 executor=executor.arv_executor,
300 versionfunc=versionstring,
301 job_order_object=job_order_object,
302 logger_handler=arvados.log_handler,
303 custom_schema_callback=add_arv_hints,
304 loadingContext=executor.loadingContext,
305 runtimeContext=executor.runtimeContext)