X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/2924f222d9efdb1b8776225d2d51bc8771d7b077..873fcf181c037cc1e42419bfeaf5bb70c9d9e239:/sdk/cwl/arvados_cwl/__init__.py diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py index 7818ac84f4..30d91b4094 100644 --- a/sdk/cwl/arvados_cwl/__init__.py +++ b/sdk/cwl/arvados_cwl/__init__.py @@ -6,15 +6,13 @@ # Implement cwl-runner interface for submitting and running work on Arvados, using # the Crunch containers API. -from future.utils import viewitems -from builtins import str - import argparse +import importlib.metadata +import importlib.resources import logging import os import sys import re -import pkg_resources # part of setuptools from schema_salad.sourceline import SourceLine import schema_salad.validate as validate @@ -28,10 +26,10 @@ from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing import arvados import arvados.config +import arvados.logging from arvados.keep import KeepClient from arvados.errors import ApiError import arvados.commands._util as arv_cmd -from arvados.api import OrderedJsonModel from .perf import Perf from ._version import __version__ @@ -57,18 +55,18 @@ arvados.log_handler.setFormatter(logging.Formatter( def versionstring(): """Print version string of key packages for provenance and debugging.""" - - arvcwlpkg = pkg_resources.require("arvados-cwl-runner") - arvpkg = pkg_resources.require("arvados-python-client") - cwlpkg = pkg_resources.require("cwltool") - - return "%s %s, %s %s, %s %s" % (sys.argv[0], arvcwlpkg[0].version, - "arvados-python-client", arvpkg[0].version, - "cwltool", cwlpkg[0].version) - + return "{} {}, arvados-python-client {}, cwltool {}".format( + sys.argv[0], + importlib.metadata.version('arvados-cwl-runner'), + importlib.metadata.version('arvados-python-client'), + importlib.metadata.version('cwltool'), + ) def arg_parser(): # type: () -> argparse.ArgumentParser - parser = argparse.ArgumentParser(description='Arvados executor for Common Workflow Language') + parser = argparse.ArgumentParser( + description='Arvados executor for Common Workflow Language', + parents=[arv_cmd.retry_opt], + ) parser.add_argument("--basedir", help="Base directory used to resolve relative references in the input, default to directory of input object file or current directory (if inputs piped/provided on command line).") @@ -120,6 +118,8 @@ def arg_parser(): # type: () -> argparse.ArgumentParser exgroup.add_argument("--create-workflow", action="store_true", help="Register an Arvados workflow that can be run from Workbench") exgroup.add_argument("--update-workflow", metavar="UUID", help="Update an existing Arvados workflow with the given UUID.") + exgroup.add_argument("--print-keep-deps", action="store_true", help="To assist copying, print a list of Keep collections that this workflow depends on.") + exgroup = parser.add_mutually_exclusive_group() exgroup.add_argument("--wait", action="store_true", help="After submitting workflow runner, wait for completion.", default=True, dest="wait") @@ -212,6 +212,10 @@ def arg_parser(): # type: () -> argparse.ArgumentParser action="store_true", default=False, help=argparse.SUPPRESS) + parser.add_argument("--fast-parser", dest="fast_parser", + action="store_true", default=False, + help=argparse.SUPPRESS) + parser.add_argument("--thread-count", type=int, default=0, help="Number of threads to use for job submit and output collection.") @@ -251,6 +255,10 @@ def arg_parser(): # type: () -> argparse.ArgumentParser default=False, dest="trash_intermediate", help="Do not trash intermediate outputs (default).") + exgroup = parser.add_mutually_exclusive_group() + exgroup.add_argument("--enable-usage-report", dest="enable_usage_report", default=None, action="store_true", help="Create usage_report.html with a summary of each step's resource usage.") + exgroup.add_argument("--disable-usage-report", dest="enable_usage_report", default=None, action="store_false", help="Disable usage report.") + parser.add_argument("workflow", default=None, help="The workflow to execute") parser.add_argument("job_order", nargs=argparse.REMAINDER, help="The input object to the workflow.") @@ -261,10 +269,8 @@ def add_arv_hints(): cwltool.command_line_tool.ACCEPTLIST_RE = cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE supported_versions = ["v1.0", "v1.1", "v1.2"] for s in supported_versions: - res = pkg_resources.resource_stream(__name__, 'arv-cwl-schema-%s.yml' % s) - customschema = res.read().decode('utf-8') + customschema = importlib.resources.read_text(__name__, f'arv-cwl-schema-{s}.yml', 'utf-8') use_custom_schema(s, "http://arvados.org/cwl", customschema) - res.close() cwltool.process.supportedProcessRequirements.extend([ "http://arvados.org/cwl#RunInSingleContainer", "http://arvados.org/cwl#OutputDirType", @@ -281,6 +287,7 @@ def add_arv_hints(): "http://arvados.org/cwl#UsePreemptible", "http://arvados.org/cwl#OutputCollectionProperties", "http://arvados.org/cwl#KeepCacheTypeRequirement", + "http://arvados.org/cwl#OutOfMemoryRetry", ]) def exit_signal_handler(sigcode, frame): @@ -316,20 +323,27 @@ def main(args=sys.argv[1:], return 1 arvargs.work_api = want_api - if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order: + workflow_op = arvargs.create_workflow or arvargs.update_workflow or arvargs.print_keep_deps + + if workflow_op and not arvargs.job_order: job_order_object = ({}, "") add_arv_hints() - for key, val in viewitems(cwltool.argparser.get_default_args()): + for key, val in cwltool.argparser.get_default_args().items(): if not hasattr(arvargs, key): setattr(arvargs, key, val) try: if api_client is None: api_client = arvados.safeapi.ThreadSafeApiCache( - api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout}, - keep_params={"num_retries": 4}, + api_params={ + 'num_retries': arvargs.retries, + 'timeout': arvargs.http_timeout, + }, + keep_params={ + 'num_retries': arvargs.retries, + }, version='v1', ) keep_client = api_client.keep @@ -337,8 +351,18 @@ def main(args=sys.argv[1:], api_client.users().current().execute() if keep_client is None: block_cache = arvados.keep.KeepBlockCache(disk_cache=True) - keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4, block_cache=block_cache) - executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4, stdout=stdout) + keep_client = arvados.keep.KeepClient( + api_client=api_client, + block_cache=block_cache, + num_retries=arvargs.retries, + ) + executor = ArvCwlExecutor( + api_client, + arvargs, + keep_client=keep_client, + num_retries=arvargs.retries, + stdout=stdout, + ) except WorkflowException as e: logger.error(e, exc_info=(sys.exc_info()[1] if arvargs.debug else False)) return 1 @@ -348,9 +372,25 @@ def main(args=sys.argv[1:], # Note that unless in debug mode, some stack traces related to user # workflow errors may be suppressed. + + # Set the logging on most modules INFO (instead of default which is WARNING) + logger.setLevel(logging.INFO) + logging.getLogger('arvados').setLevel(logging.INFO) + logging.getLogger('arvados.keep').setLevel(logging.WARNING) + # API retries are filtered to the INFO level and can be noisy, but as long as + # they succeed we don't need to see warnings about it. + googleapiclient_http_logger = logging.getLogger('googleapiclient.http') + googleapiclient_http_logger.addFilter(arvados.logging.GoogleHTTPClientFilter()) + googleapiclient_http_logger.setLevel(logging.WARNING) + if arvargs.debug: logger.setLevel(logging.DEBUG) logging.getLogger('arvados').setLevel(logging.DEBUG) + # In debug mode show logs about retries, but we arn't + # debugging the google client so we don't need to see + # everything. + googleapiclient_http_logger.setLevel(logging.NOTSET) + logging.getLogger('googleapiclient').setLevel(logging.INFO) if arvargs.quiet: logger.setLevel(logging.WARN) @@ -377,9 +417,12 @@ def main(args=sys.argv[1:], # unit tests. stdout = None - if arvargs.submit and (arvargs.workflow.startswith("arvwf:") or workflow_uuid_pattern.match(arvargs.workflow)): + executor.loadingContext.default_docker_image = arvargs.submit_runner_image or "arvados/jobs:"+__version__ + + if arvargs.workflow.startswith("arvwf:") or workflow_uuid_pattern.match(arvargs.workflow) or arvargs.workflow.startswith("keep:"): executor.loadingContext.do_validate = False - executor.fast_submit = True + if arvargs.submit and not workflow_op: + executor.fast_submit = True return cwltool.main.main(args=arvargs, stdout=stdout, @@ -391,4 +434,4 @@ def main(args=sys.argv[1:], custom_schema_callback=add_arv_hints, loadingContext=executor.loadingContext, runtimeContext=executor.toplevel_runtimeContext, - input_required=not (arvargs.create_workflow or arvargs.update_workflow)) + input_required=not workflow_op)