14198: Bringing unit tests up to date WIP
[arvados.git] / sdk / cwl / arvados_cwl / __init__.py
1 #!/usr/bin/env python
2 # Copyright (C) The Arvados Authors. All rights reserved.
3 #
4 # SPDX-License-Identifier: Apache-2.0
5
6 # Implement cwl-runner interface for submitting and running work on Arvados, using
7 # either the Crunch jobs API or Crunch containers API.
8
9 import argparse
10 import logging
11 import os
12 import sys
13 import re
14 import pkg_resources  # part of setuptools
15
16 from schema_salad.sourceline import SourceLine
17 import schema_salad.validate as validate
18 import cwltool.main
19 import cwltool.workflow
20 import cwltool.process
21 import cwltool.argparser
22 from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
23 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, get_listing
24
25 import arvados
26 import arvados.config
27 from arvados.keep import KeepClient
28 from arvados.errors import ApiError
29 import arvados.commands._util as arv_cmd
30 from arvados.api import OrderedJsonModel
31
32 from .perf import Perf
33 from ._version import __version__
34 from .executor import ArvCwlExecutor
35
36 # These arn't used directly in this file but
37 # other code expects to import them from here
38 from .arvcontainer import ArvadosContainer
39 from .arvtool import ArvadosCommandTool
40 from .fsaccess import CollectionFsAccess, CollectionCache
41 from .util import get_current_container
42 from .executor import RuntimeStatusLoggingHandler, DEFAULT_PRIORITY
43
44 logger = logging.getLogger('arvados.cwl-runner')
45 metrics = logging.getLogger('arvados.cwl-runner.metrics')
46 logger.setLevel(logging.INFO)
47
48 arvados.log_handler.setFormatter(logging.Formatter(
49         '%(asctime)s %(name)s %(levelname)s: %(message)s',
50         '%Y-%m-%d %H:%M:%S'))
51
52 def versionstring():
53     """Print version string of key packages for provenance and debugging."""
54
55     arvcwlpkg = pkg_resources.require("arvados-cwl-runner")
56     arvpkg = pkg_resources.require("arvados-python-client")
57     cwlpkg = pkg_resources.require("cwltool")
58
59     return "%s %s, %s %s, %s %s" % (sys.argv[0], arvcwlpkg[0].version,
60                                     "arvados-python-client", arvpkg[0].version,
61                                     "cwltool", cwlpkg[0].version)
62
63
64 def arg_parser():  # type: () -> argparse.ArgumentParser
65     parser = argparse.ArgumentParser(description='Arvados executor for Common Workflow Language')
66
67     parser.add_argument("--basedir", type=str,
68                         help="Base directory used to resolve relative references in the input, default to directory of input object file or current directory (if inputs piped/provided on command line).")
69     parser.add_argument("--outdir", type=str, default=os.path.abspath('.'),
70                         help="Output directory, default current directory")
71
72     parser.add_argument("--eval-timeout",
73                         help="Time to wait for a Javascript expression to evaluate before giving an error, default 20s.",
74                         type=float,
75                         default=20)
76
77     exgroup = parser.add_mutually_exclusive_group()
78     exgroup.add_argument("--print-dot", action="store_true",
79                          help="Print workflow visualization in graphviz format and exit")
80     exgroup.add_argument("--version", action="version", help="Print version and exit", version=versionstring())
81     exgroup.add_argument("--validate", action="store_true", help="Validate CWL document only.")
82
83     exgroup = parser.add_mutually_exclusive_group()
84     exgroup.add_argument("--verbose", action="store_true", help="Default logging")
85     exgroup.add_argument("--quiet", action="store_true", help="Only print warnings and errors.")
86     exgroup.add_argument("--debug", action="store_true", help="Print even more logging")
87
88     parser.add_argument("--metrics", action="store_true", help="Print timing metrics")
89
90     parser.add_argument("--tool-help", action="store_true", help="Print command line help for tool")
91
92     exgroup = parser.add_mutually_exclusive_group()
93     exgroup.add_argument("--enable-reuse", action="store_true",
94                         default=True, dest="enable_reuse",
95                         help="Enable job or container reuse (default)")
96     exgroup.add_argument("--disable-reuse", action="store_false",
97                         default=True, dest="enable_reuse",
98                         help="Disable job or container reuse")
99
100     parser.add_argument("--project-uuid", type=str, metavar="UUID", help="Project that will own the workflow jobs, if not provided, will go to home project.")
101     parser.add_argument("--output-name", type=str, help="Name to use for collection that stores the final output.", default=None)
102     parser.add_argument("--output-tags", type=str, help="Tags for the final output collection separated by commas, e.g., '--output-tags tag0,tag1,tag2'.", default=None)
103     parser.add_argument("--ignore-docker-for-reuse", action="store_true",
104                         help="Ignore Docker image version when deciding whether to reuse past jobs.",
105                         default=False)
106
107     exgroup = parser.add_mutually_exclusive_group()
108     exgroup.add_argument("--submit", action="store_true", help="Submit workflow to run on Arvados.",
109                         default=True, dest="submit")
110     exgroup.add_argument("--local", action="store_false", help="Run workflow on local host (submits jobs to Arvados).",
111                         default=True, dest="submit")
112     exgroup.add_argument("--create-template", action="store_true", help="(Deprecated) synonym for --create-workflow.",
113                          dest="create_workflow")
114     exgroup.add_argument("--create-workflow", action="store_true", help="Create an Arvados workflow (if using the 'containers' API) or pipeline template (if using the 'jobs' API). See --api.")
115     exgroup.add_argument("--update-workflow", type=str, metavar="UUID", help="Update an existing Arvados workflow or pipeline template with the given UUID.")
116
117     exgroup = parser.add_mutually_exclusive_group()
118     exgroup.add_argument("--wait", action="store_true", help="After submitting workflow runner job, wait for completion.",
119                         default=True, dest="wait")
120     exgroup.add_argument("--no-wait", action="store_false", help="Submit workflow runner job and exit.",
121                         default=True, dest="wait")
122
123     exgroup = parser.add_mutually_exclusive_group()
124     exgroup.add_argument("--log-timestamps", action="store_true", help="Prefix logging lines with timestamp",
125                         default=True, dest="log_timestamps")
126     exgroup.add_argument("--no-log-timestamps", action="store_false", help="No timestamp on logging lines",
127                         default=True, dest="log_timestamps")
128
129     parser.add_argument("--api", type=str,
130                         default=None, dest="work_api",
131                         choices=("jobs", "containers"),
132                         help="Select work submission API.  Default is 'jobs' if that API is available, otherwise 'containers'.")
133
134     parser.add_argument("--compute-checksum", action="store_true", default=False,
135                         help="Compute checksum of contents while collecting outputs",
136                         dest="compute_checksum")
137
138     parser.add_argument("--submit-runner-ram", type=int,
139                         help="RAM (in MiB) required for the workflow runner job (default 1024)",
140                         default=None)
141
142     parser.add_argument("--submit-runner-image", type=str,
143                         help="Docker image for workflow runner job, default arvados/jobs:%s" % __version__,
144                         default=None)
145
146     exgroup = parser.add_mutually_exclusive_group()
147     exgroup.add_argument("--submit-request-uuid", type=str,
148                         default=None,
149                         help="Update and commit to supplied container request instead of creating a new one (containers API only).")
150     exgroup.add_argument("--submit-runner-cluster", type=str,
151                         help="Submit toplevel runner to a remote cluster (containers API only)",
152                         default=None)
153
154     parser.add_argument("--name", type=str,
155                         help="Name to use for workflow execution instance.",
156                         default=None)
157
158     parser.add_argument("--on-error", type=str,
159                         help="Desired workflow behavior when a step fails.  One of 'stop' or 'continue'. "
160                         "Default is 'continue'.", default="continue", choices=("stop", "continue"))
161
162     parser.add_argument("--enable-dev", action="store_true",
163                         help="Enable loading and running development versions "
164                              "of CWL spec.", default=False)
165     parser.add_argument('--storage-classes', default="default", type=str,
166                         help="Specify comma separated list of storage classes to be used when saving workflow output to Keep.")
167
168     parser.add_argument("--intermediate-output-ttl", type=int, metavar="N",
169                         help="If N > 0, intermediate output collections will be trashed N seconds after creation.  Default is 0 (don't trash).",
170                         default=0)
171
172     parser.add_argument("--priority", type=int,
173                         help="Workflow priority (range 1..1000, higher has precedence over lower, containers api only)",
174                         default=DEFAULT_PRIORITY)
175
176     parser.add_argument("--disable-validate", dest="do_validate",
177                         action="store_false", default=True,
178                         help=argparse.SUPPRESS)
179
180     parser.add_argument("--disable-js-validation",
181                         action="store_true", default=False,
182                         help=argparse.SUPPRESS)
183
184     parser.add_argument("--thread-count", type=int,
185                         default=4, help="Number of threads to use for job submit and output collection.")
186
187     parser.add_argument("--http-timeout", type=int,
188                         default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
189
190     exgroup = parser.add_mutually_exclusive_group()
191     exgroup.add_argument("--trash-intermediate", action="store_true",
192                         default=False, dest="trash_intermediate",
193                          help="Immediately trash intermediate outputs on workflow success.")
194     exgroup.add_argument("--no-trash-intermediate", action="store_false",
195                         default=False, dest="trash_intermediate",
196                         help="Do not trash intermediate outputs (default).")
197
198     parser.add_argument("workflow", type=str, default=None, help="The workflow to execute")
199     parser.add_argument("job_order", nargs=argparse.REMAINDER, help="The input object to the workflow.")
200
201     return parser
202
203 def add_arv_hints():
204     cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*")
205     cwltool.command_line_tool.ACCEPTLIST_RE = cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE
206     res = pkg_resources.resource_stream(__name__, 'arv-cwl-schema.yml')
207     use_custom_schema("v1.0", "http://arvados.org/cwl", res.read())
208     res.close()
209     cwltool.process.supportedProcessRequirements.extend([
210         "http://arvados.org/cwl#RunInSingleContainer",
211         "http://arvados.org/cwl#OutputDirType",
212         "http://arvados.org/cwl#RuntimeConstraints",
213         "http://arvados.org/cwl#PartitionRequirement",
214         "http://arvados.org/cwl#APIRequirement",
215         "http://commonwl.org/cwltool#LoadListingRequirement",
216         "http://arvados.org/cwl#IntermediateOutput",
217         "http://arvados.org/cwl#ReuseRequirement",
218         "http://arvados.org/cwl#ClusterTarget"
219     ])
220
221 def exit_signal_handler(sigcode, frame):
222     logger.error("Caught signal {}, exiting.".format(sigcode))
223     sys.exit(-sigcode)
224
225 def main(args, stdout, stderr, api_client=None, keep_client=None,
226          install_sig_handlers=True):
227     parser = arg_parser()
228
229     job_order_object = None
230     arvargs = parser.parse_args(args)
231
232     if len(arvargs.storage_classes.strip().split(',')) > 1:
233         logger.error("Multiple storage classes are not supported currently.")
234         return 1
235
236     arvargs.use_container = True
237     arvargs.relax_path_checks = True
238     arvargs.print_supported_versions = False
239
240     if install_sig_handlers:
241         arv_cmd.install_signal_handlers()
242
243     if arvargs.update_workflow:
244         if arvargs.update_workflow.find('-7fd4e-') == 5:
245             want_api = 'containers'
246         elif arvargs.update_workflow.find('-p5p6p-') == 5:
247             want_api = 'jobs'
248         else:
249             want_api = None
250         if want_api and arvargs.work_api and want_api != arvargs.work_api:
251             logger.error('--update-workflow arg {!r} uses {!r} API, but --api={!r} specified'.format(
252                 arvargs.update_workflow, want_api, arvargs.work_api))
253             return 1
254         arvargs.work_api = want_api
255
256     if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order:
257         job_order_object = ({}, "")
258
259     add_arv_hints()
260
261     for key, val in cwltool.argparser.get_default_args().items():
262         if not hasattr(arvargs, key):
263             setattr(arvargs, key, val)
264
265     try:
266         if api_client is None:
267             api_client = arvados.safeapi.ThreadSafeApiCache(
268                 api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout},
269                 keep_params={"num_retries": 4})
270             keep_client = api_client.keep
271             # Make an API object now so errors are reported early.
272             api_client.users().current().execute()
273         if keep_client is None:
274             keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
275         executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
276     except Exception as e:
277         logger.error(e)
278         return 1
279
280     if arvargs.debug:
281         logger.setLevel(logging.DEBUG)
282         logging.getLogger('arvados').setLevel(logging.DEBUG)
283
284     if arvargs.quiet:
285         logger.setLevel(logging.WARN)
286         logging.getLogger('arvados').setLevel(logging.WARN)
287         logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
288
289     if arvargs.metrics:
290         metrics.setLevel(logging.DEBUG)
291         logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)
292
293     if arvargs.log_timestamps:
294         arvados.log_handler.setFormatter(logging.Formatter(
295             '%(asctime)s %(name)s %(levelname)s: %(message)s',
296             '%Y-%m-%d %H:%M:%S'))
297     else:
298         arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))
299
300     return cwltool.main.main(args=arvargs,
301                              stdout=stdout,
302                              stderr=stderr,
303                              executor=executor.arv_executor,
304                              versionfunc=versionstring,
305                              job_order_object=job_order_object,
306                              logger_handler=arvados.log_handler,
307                              custom_schema_callback=add_arv_hints,
308                              loadingContext=executor.loadingContext,
309                              runtimeContext=executor.runtimeContext)