Merge branch 'patch-1' of https://github.com/mr-c/arvados into mr-c-patch-1
[arvados.git] / sdk / cwl / arvados_cwl / __init__.py
1 #!/usr/bin/env python
2 # Copyright (C) The Arvados Authors. All rights reserved.
3 #
4 # SPDX-License-Identifier: Apache-2.0
5
6 # Implement cwl-runner interface for submitting and running work on Arvados, using
7 # the Crunch containers API.
8
9 from future.utils import viewitems
10 from builtins import str
11
12 import argparse
13 import logging
14 import os
15 import sys
16 import re
17 import pkg_resources  # part of setuptools
18
19 from schema_salad.sourceline import SourceLine
20 import schema_salad.validate as validate
21 import cwltool.main
22 import cwltool.workflow
23 import cwltool.process
24 import cwltool.argparser
25 from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
26 from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing
27
28 import arvados
29 import arvados.config
30 from arvados.keep import KeepClient
31 from arvados.errors import ApiError
32 import arvados.commands._util as arv_cmd
33 from arvados.api import OrderedJsonModel
34
35 from .perf import Perf
36 from ._version import __version__
37 from .executor import ArvCwlExecutor
38
39 # These aren't used directly in this file but
40 # other code expects to import them from here
41 from .arvcontainer import ArvadosContainer
42 from .arvtool import ArvadosCommandTool
43 from .fsaccess import CollectionFsAccess, CollectionCache, CollectionFetcher
44 from .util import get_current_container
45 from .executor import RuntimeStatusLoggingHandler, DEFAULT_PRIORITY
46 from .arvworkflow import ArvadosWorkflow
47
48 logger = logging.getLogger('arvados.cwl-runner')
49 metrics = logging.getLogger('arvados.cwl-runner.metrics')
50 logger.setLevel(logging.INFO)
51
52 arvados.log_handler.setFormatter(logging.Formatter(
53         '%(asctime)s %(name)s %(levelname)s: %(message)s',
54         '%Y-%m-%d %H:%M:%S'))
55
56 def versionstring():
57     """Print version string of key packages for provenance and debugging."""
58
59     arvcwlpkg = pkg_resources.require("arvados-cwl-runner")
60     arvpkg = pkg_resources.require("arvados-python-client")
61     cwlpkg = pkg_resources.require("cwltool")
62
63     return "%s %s, %s %s, %s %s" % (sys.argv[0], arvcwlpkg[0].version,
64                                     "arvados-python-client", arvpkg[0].version,
65                                     "cwltool", cwlpkg[0].version)
66
67
68 def arg_parser():  # type: () -> argparse.ArgumentParser
69     parser = argparse.ArgumentParser(description='Arvados executor for Common Workflow Language')
70
71     parser.add_argument("--basedir",
72                         help="Base directory used to resolve relative references in the input, default to directory of input object file or current directory (if inputs piped/provided on command line).")
73     parser.add_argument("--outdir", default=os.path.abspath('.'),
74                         help="Output directory, default current directory")
75
76     parser.add_argument("--eval-timeout",
77                         help="Time to wait for a Javascript expression to evaluate before giving an error, default 20s.",
78                         type=float,
79                         default=20)
80
81     exgroup = parser.add_mutually_exclusive_group()
82     exgroup.add_argument("--print-dot", action="store_true",
83                          help="Print workflow visualization in graphviz format and exit")
84     exgroup.add_argument("--version", action="version", help="Print version and exit", version=versionstring())
85     exgroup.add_argument("--validate", action="store_true", help="Validate CWL document only.")
86
87     exgroup = parser.add_mutually_exclusive_group()
88     exgroup.add_argument("--verbose", action="store_true", help="Default logging")
89     exgroup.add_argument("--quiet", action="store_true", help="Only print warnings and errors.")
90     exgroup.add_argument("--debug", action="store_true", help="Print even more logging")
91
92     parser.add_argument("--metrics", action="store_true", help="Print timing metrics")
93
94     parser.add_argument("--tool-help", action="store_true", help="Print command line help for tool")
95
96     exgroup = parser.add_mutually_exclusive_group()
97     exgroup.add_argument("--enable-reuse", action="store_true",
98                         default=True, dest="enable_reuse",
99                         help="Enable container reuse (default)")
100     exgroup.add_argument("--disable-reuse", action="store_false",
101                         default=True, dest="enable_reuse",
102                         help="Disable container reuse")
103
104     parser.add_argument("--project-uuid", metavar="UUID", help="Project that will own the workflow containers, if not provided, will go to home project.")
105     parser.add_argument("--output-name", help="Name to use for collection that stores the final output.", default=None)
106     parser.add_argument("--output-tags", help="Tags for the final output collection separated by commas, e.g., '--output-tags tag0,tag1,tag2'.", default=None)
107     parser.add_argument("--ignore-docker-for-reuse", action="store_true",
108                         help="Ignore Docker image version when deciding whether to reuse past containers.",
109                         default=False)
110
111     exgroup = parser.add_mutually_exclusive_group()
112     exgroup.add_argument("--submit", action="store_true", help="Submit workflow to run on Arvados.",
113                         default=True, dest="submit")
114     exgroup.add_argument("--local", action="store_false", help="Run workflow on local host (submits containers to Arvados).",
115                         default=True, dest="submit")
116     exgroup.add_argument("--create-template", action="store_true", help="(Deprecated) synonym for --create-workflow.",
117                          dest="create_workflow")
118     exgroup.add_argument("--create-workflow", action="store_true", help="Register an Arvados workflow that can be run from Workbench")
119     exgroup.add_argument("--update-workflow", metavar="UUID", help="Update an existing Arvados workflow with the given UUID.")
120
121     exgroup = parser.add_mutually_exclusive_group()
122     exgroup.add_argument("--wait", action="store_true", help="After submitting workflow runner, wait for completion.",
123                         default=True, dest="wait")
124     exgroup.add_argument("--no-wait", action="store_false", help="Submit workflow runner and exit.",
125                         default=True, dest="wait")
126
127     exgroup = parser.add_mutually_exclusive_group()
128     exgroup.add_argument("--log-timestamps", action="store_true", help="Prefix logging lines with timestamp",
129                         default=True, dest="log_timestamps")
130     exgroup.add_argument("--no-log-timestamps", action="store_false", help="No timestamp on logging lines",
131                         default=True, dest="log_timestamps")
132
133     parser.add_argument("--api",
134                         default=None, dest="work_api",
135                         choices=("containers",),
136                         help="Select work submission API.  Only supports 'containers'")
137
138     parser.add_argument("--compute-checksum", action="store_true", default=False,
139                         help="Compute checksum of contents while collecting outputs",
140                         dest="compute_checksum")
141
142     parser.add_argument("--submit-runner-ram", type=int,
143                         help="RAM (in MiB) required for the workflow runner job (default 1024)",
144                         default=None)
145
146     parser.add_argument("--submit-runner-image",
147                         help="Docker image for workflow runner job, default arvados/jobs:%s" % __version__,
148                         default=None)
149
150     parser.add_argument("--always-submit-runner", action="store_true",
151                         help="When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool",
152                         default=False)
153
154     exgroup = parser.add_mutually_exclusive_group()
155     exgroup.add_argument("--submit-request-uuid",
156                          default=None,
157                          help="Update and commit to supplied container request instead of creating a new one.",
158                          metavar="UUID")
159     exgroup.add_argument("--submit-runner-cluster",
160                          help="Submit workflow runner to a remote cluster",
161                          default=None,
162                          metavar="CLUSTER_ID")
163
164     parser.add_argument("--collection-cache-size", type=int,
165                         default=None,
166                         help="Collection cache size (in MiB, default 256).")
167
168     parser.add_argument("--name",
169                         help="Name to use for workflow execution instance.",
170                         default=None)
171
172     parser.add_argument("--on-error",
173                         help="Desired workflow behavior when a step fails.  One of 'stop' (do not submit any more steps) or "
174                         "'continue' (may submit other steps that are not downstream from the error). Default is 'continue'.",
175                         default="continue", choices=("stop", "continue"))
176
177     parser.add_argument("--enable-dev", action="store_true",
178                         help="Enable loading and running development versions "
179                              "of the CWL standards.", default=False)
180     parser.add_argument('--storage-classes', default="default",
181                         help="Specify comma separated list of storage classes to be used when saving workflow output to Keep.")
182
183     parser.add_argument("--intermediate-output-ttl", type=int, metavar="N",
184                         help="If N > 0, intermediate output collections will be trashed N seconds after creation.  Default is 0 (don't trash).",
185                         default=0)
186
187     parser.add_argument("--priority", type=int,
188                         help="Workflow priority (range 1..1000, higher has precedence over lower)",
189                         default=DEFAULT_PRIORITY)
190
191     parser.add_argument("--disable-validate", dest="do_validate",
192                         action="store_false", default=True,
193                         help=argparse.SUPPRESS)
194
195     parser.add_argument("--disable-js-validation",
196                         action="store_true", default=False,
197                         help=argparse.SUPPRESS)
198
199     parser.add_argument("--thread-count", type=int,
200                         default=1, help="Number of threads to use for job submit and output collection.")
201
202     parser.add_argument("--http-timeout", type=int,
203                         default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
204
205     exgroup = parser.add_mutually_exclusive_group()
206     exgroup.add_argument("--trash-intermediate", action="store_true",
207                         default=False, dest="trash_intermediate",
208                          help="Immediately trash intermediate outputs on workflow success.")
209     exgroup.add_argument("--no-trash-intermediate", action="store_false",
210                         default=False, dest="trash_intermediate",
211                         help="Do not trash intermediate outputs (default).")
212
213     parser.add_argument("workflow", default=None, help="The workflow to execute")
214     parser.add_argument("job_order", nargs=argparse.REMAINDER, help="The input object to the workflow.")
215
216     return parser
217
218 def add_arv_hints():
219     cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*")
220     cwltool.command_line_tool.ACCEPTLIST_RE = cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE
221     res10 = pkg_resources.resource_stream(__name__, 'arv-cwl-schema-v1.0.yml')
222     res11 = pkg_resources.resource_stream(__name__, 'arv-cwl-schema-v1.1.yml')
223     customschema10 = res10.read().decode('utf-8')
224     customschema11 = res11.read().decode('utf-8')
225     use_custom_schema("v1.0", "http://arvados.org/cwl", customschema10)
226     use_custom_schema("v1.1.0-dev1", "http://arvados.org/cwl", customschema11)
227     use_custom_schema("v1.1", "http://arvados.org/cwl", customschema11)
228     res10.close()
229     res11.close()
230     cwltool.process.supportedProcessRequirements.extend([
231         "http://arvados.org/cwl#RunInSingleContainer",
232         "http://arvados.org/cwl#OutputDirType",
233         "http://arvados.org/cwl#RuntimeConstraints",
234         "http://arvados.org/cwl#PartitionRequirement",
235         "http://arvados.org/cwl#APIRequirement",
236         "http://commonwl.org/cwltool#LoadListingRequirement",
237         "http://arvados.org/cwl#IntermediateOutput",
238         "http://arvados.org/cwl#ReuseRequirement",
239         "http://arvados.org/cwl#ClusterTarget"
240     ])
241
242 def exit_signal_handler(sigcode, frame):
243     logger.error(str(u"Caught signal {}, exiting.").format(sigcode))
244     sys.exit(-sigcode)
245
246 def main(args, stdout, stderr, api_client=None, keep_client=None,
247          install_sig_handlers=True):
248     parser = arg_parser()
249
250     job_order_object = None
251     arvargs = parser.parse_args(args)
252
253     if len(arvargs.storage_classes.strip().split(',')) > 1:
254         logger.error(str(u"Multiple storage classes are not supported currently."))
255         return 1
256
257     arvargs.use_container = True
258     arvargs.relax_path_checks = True
259     arvargs.print_supported_versions = False
260
261     if install_sig_handlers:
262         arv_cmd.install_signal_handlers()
263
264     if arvargs.update_workflow:
265         if arvargs.update_workflow.find('-7fd4e-') == 5:
266             want_api = 'containers'
267         else:
268             want_api = None
269         if want_api and arvargs.work_api and want_api != arvargs.work_api:
270             logger.error(str(u'--update-workflow arg {!r} uses {!r} API, but --api={!r} specified').format(
271                 arvargs.update_workflow, want_api, arvargs.work_api))
272             return 1
273         arvargs.work_api = want_api
274
275     if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order:
276         job_order_object = ({}, "")
277
278     add_arv_hints()
279
280     for key, val in viewitems(cwltool.argparser.get_default_args()):
281         if not hasattr(arvargs, key):
282             setattr(arvargs, key, val)
283
284     try:
285         if api_client is None:
286             api_client = arvados.safeapi.ThreadSafeApiCache(
287                 api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout},
288                 keep_params={"num_retries": 4})
289             keep_client = api_client.keep
290             # Make an API object now so errors are reported early.
291             api_client.users().current().execute()
292         if keep_client is None:
293             keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
294         executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
295     except Exception:
296         logger.exception("Error creating the Arvados CWL Executor")
297         return 1
298
299     # Note that unless in debug mode, some stack traces related to user
300     # workflow errors may be suppressed.
301     if arvargs.debug:
302         logger.setLevel(logging.DEBUG)
303         logging.getLogger('arvados').setLevel(logging.DEBUG)
304
305     if arvargs.quiet:
306         logger.setLevel(logging.WARN)
307         logging.getLogger('arvados').setLevel(logging.WARN)
308         logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
309
310     if arvargs.metrics:
311         metrics.setLevel(logging.DEBUG)
312         logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)
313
314     if arvargs.log_timestamps:
315         arvados.log_handler.setFormatter(logging.Formatter(
316             '%(asctime)s %(name)s %(levelname)s: %(message)s',
317             '%Y-%m-%d %H:%M:%S'))
318     else:
319         arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))
320
321     if stdout is sys.stdout:
322         # cwltool.main has code to work around encoding issues with
323         # sys.stdout and unix pipes (they default to ASCII encoding,
324         # we want utf-8), so when stdout is sys.stdout set it to None
325         # to take advantage of that.  Don't override it for all cases
326         # since we still want to be able to capture stdout for the
327         # unit tests.
328         stdout = None
329
330     return cwltool.main.main(args=arvargs,
331                              stdout=stdout,
332                              stderr=stderr,
333                              executor=executor.arv_executor,
334                              versionfunc=versionstring,
335                              job_order_object=job_order_object,
336                              logger_handler=arvados.log_handler,
337                              custom_schema_callback=add_arv_hints,
338                              loadingContext=executor.loadingContext,
339                              runtimeContext=executor.runtimeContext,
340                              input_required=not (arvargs.create_workflow or arvargs.update_workflow))