1 from past.builtins import basestring
2 # Copyright (C) The Arvados Authors. All rights reserved.
4 # SPDX-License-Identifier: Apache-2.0
6 # Crunch script integration for running arvados-cwl-runner (importing
7 # arvados_cwl module) inside a crunch job.
9 # This gets the job record, transforms the script parameters into a valid CWL
10 # input object, then executes the CWL runner to run the underlying workflow or
11 # tool. When the workflow completes, record the output object in an output
12 # collection for this runner job.
16 import arvados.collection
26 from arvados.api import OrderedJsonModel
27 from cwltool.process import shortname
28 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, normalizeFilesDirs
29 from cwltool.load_tool import load_tool
30 from cwltool.errors import WorkflowException
31 from arvados_cwl.context import ArvRuntimeContext
33 from .fsaccess import CollectionFetcher, CollectionFsAccess
35 logger = logging.getLogger('arvados.cwl-runner')
38 # Timestamps are added by crunch-job, so don't print redundant timestamps.
39 arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))
41 # Print package versions
42 logger.info(arvados_cwl.versionstring())
44 api = arvados.api("v1")
46 arvados_cwl.add_arv_hints()
50 job_order_object = arvados.current_job()['script_parameters']
51 toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool"))
53 pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')
63 v["location"] = keeppath(v["location"])
65 for k,v in list(job_order_object.items()):
66 if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
67 job_order_object[k] = {
69 "location": "keep:%s" % v
72 adjustFileObjs(job_order_object, keeppathObj)
73 adjustDirObjs(job_order_object, keeppathObj)
74 normalizeFilesDirs(job_order_object)
82 if "arv:output_name" in job_order_object:
83 output_name = job_order_object["arv:output_name"]
84 del job_order_object["arv:output_name"]
86 if "arv:output_tags" in job_order_object:
87 output_tags = job_order_object["arv:output_tags"]
88 del job_order_object["arv:output_tags"]
90 if "arv:enable_reuse" in job_order_object:
91 enable_reuse = job_order_object["arv:enable_reuse"]
92 del job_order_object["arv:enable_reuse"]
94 if "arv:on_error" in job_order_object:
95 on_error = job_order_object["arv:on_error"]
96 del job_order_object["arv:on_error"]
98 if "arv:debug" in job_order_object:
99 debug = job_order_object["arv:debug"]
100 del job_order_object["arv:debug"]
102 arvargs = argparse.Namespace()
103 arvargs.work_api = "jobs"
104 arvargs.output_name = output_name
105 arvargs.output_tags = output_tags
106 arvargs.thread_count = 1
107 arvargs.collection_cache_size = None
109 runner = arvados_cwl.ArvCwlExecutor(api_client=arvados.safeapi.ThreadSafeApiCache(
110 api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}),
113 make_fs_access = functools.partial(CollectionFsAccess,
114 collection_cache=runner.collection_cache)
116 t = load_tool(toolpath, runner.loadingContext)
119 logger.setLevel(logging.DEBUG)
120 logging.getLogger('arvados').setLevel(logging.DEBUG)
121 logging.getLogger("cwltool").setLevel(logging.DEBUG)
123 args = ArvRuntimeContext(vars(arvargs))
124 args.project_uuid = arvados.current_job()["owner_uuid"]
125 args.enable_reuse = enable_reuse
126 args.on_error = on_error
130 args.ignore_docker_for_reuse = False
131 args.basedir = os.getcwd()
133 args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
134 args.make_fs_access = make_fs_access
135 args.trash_intermediate = False
136 args.intermediate_output_ttl = 0
137 args.priority = arvados_cwl.DEFAULT_PRIORITY
138 args.do_validate = True
139 args.disable_js_validation = False
140 args.tmp_outdir_prefix = "tmp"
142 runner.arv_executor(t, job_order_object, args, logger=logger)
143 except Exception as e:
144 if isinstance(e, WorkflowException):
145 logging.info("Workflow error %s", e)
147 logging.exception("Unhandled exception")
148 if runner and runner.final_output_collection:
149 outputCollection = runner.final_output_collection.portable_data_hash()
151 outputCollection = None
152 api.job_tasks().update(uuid=arvados.current_task()['uuid'],
154 'output': outputCollection,