X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/0e5198142fdba0ce2af6eb2852d45dff46ffb2e2..0561bd0c3c07257fd58ded6c7cfa5feeae97af57:/sdk/cwl/arvados_cwl/crunch_script.py diff --git a/sdk/cwl/arvados_cwl/crunch_script.py b/sdk/cwl/arvados_cwl/crunch_script.py index e5c6d67ac9..7fbbd29d50 100644 --- a/sdk/cwl/arvados_cwl/crunch_script.py +++ b/sdk/cwl/arvados_cwl/crunch_script.py @@ -1,3 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + # Crunch script integration for running arvados-cwl-runner (importing # arvados_cwl module) inside a crunch job. # @@ -19,12 +23,19 @@ import re import functools from arvados.api import OrderedJsonModel -from cwltool.process import shortname, adjustFileObjs, adjustDirObjs, getListing, normalizeFilesDirs +from cwltool.process import shortname +from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, normalizeFilesDirs from cwltool.load_tool import load_tool +from cwltool.errors import WorkflowException + +from .fsaccess import CollectionFetcher, CollectionFsAccess logger = logging.getLogger('arvados.cwl-runner') def run(): + # Timestamps are added by crunch-job, so don't print redundant timestamps. + arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) + # Print package versions logger.info(arvados_cwl.versionstring()) @@ -32,8 +43,10 @@ def run(): arvados_cwl.add_arv_hints() + runner = None try: job_order_object = arvados.current_job()['script_parameters'] + toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool")) pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') @@ -44,9 +57,8 @@ def run(): return v def keeppathObj(v): - v["location"] = keeppath(v["location"]) - - job_order_object["cwl:tool"] = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object["cwl:tool"]) + if "location" in v: + v["location"] = keeppath(v["location"]) for k,v in job_order_object.items(): if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v): @@ -58,45 +70,67 @@ def run(): adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) - adjustDirObjs(job_order_object, functools.partial(getListing, arvados_cwl.fsaccess.CollectionFsAccess("", api_client=api))) output_name = None + output_tags = None + enable_reuse = True + on_error = "continue" if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] + if "arv:output_tags" in job_order_object: + output_tags = job_order_object["arv:output_tags"] + del job_order_object["arv:output_tags"] + + if "arv:enable_reuse" in job_order_object: + enable_reuse = job_order_object["arv:enable_reuse"] + del job_order_object["arv:enable_reuse"] + + if "arv:on_error" in job_order_object: + on_error = job_order_object["arv:on_error"] + del job_order_object["arv:on_error"] + runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()), - output_name=output_name) + output_name=output_name, output_tags=output_tags) + + make_fs_access = functools.partial(CollectionFsAccess, + collection_cache=runner.collection_cache) - t = load_tool(job_order_object, runner.arv_make_tool) + t = load_tool(toolpath, runner.arv_make_tool, + fetcher_constructor=functools.partial(CollectionFetcher, + api_client=runner.api, + fs_access=make_fs_access(""), + num_retries=runner.num_retries)) args = argparse.Namespace() args.project_uuid = arvados.current_job()["owner_uuid"] - args.enable_reuse = True + args.enable_reuse = enable_reuse + args.on_error = on_error args.submit = False - args.debug = True + args.debug = False args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() + args.name = None args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]} - outputObj = runner.arv_executor(t, job_order_object, **vars(args)) + args.make_fs_access = make_fs_access + args.trash_intermediate = False + args.intermediate_output_ttl = 0 - if runner.final_output_collection: + runner.arv_executor(t, job_order_object, **vars(args)) + except Exception as e: + if isinstance(e, WorkflowException): + logging.info("Workflow error %s", e) + else: + logging.exception("Unhandled exception") + if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash() else: outputCollection = None - api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, - 'success': True, - 'progress':1.0 - }).execute() - except Exception as e: - logging.exception("Unhandled exception") - api.job_tasks().update(uuid=arvados.current_task()['uuid'], - body={ - 'output': None, 'success': False, 'progress':1.0 }).execute()