8653: cwl-runner crunch script rewrites keep file paths into CWL File objects.
[arvados.git] / crunch_scripts / cwl-runner
1 #!/usr/bin/env python
2
3 # Crunch script integration for running arvados-cwl-runner (importing
4 # arvados_cwl module) inside a crunch job.
5 #
6 # This gets the job record, transforms the script parameters into a valid CWL
7 # input object, then executes the CWL runner to run the underlying workflow or
8 # tool.  When the workflow completes, record the output object in an output
9 # collection for this runner job.
10
11 import arvados
12 import arvados_cwl
13 import arvados.collection
14 import arvados.util
15 from cwltool.process import shortname
16 import cwltool.main
17 import logging
18 import os
19 import json
20 import argparse
21 from arvados.api import OrderedJsonModel
22 from cwltool.process import adjustFiles
23
24 # Print package versions
25 logging.info(cwltool.main.versionstring())
26
27 api = arvados.api("v1")
28
29 try:
30     job_order_object = arvados.current_job()['script_parameters']
31
32     def keeppath(v):
33         if arvados.util.keep_locator_pattern.match(v):
34             return "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], v)
35
36     job_order_object["cwl:tool"] = keeppath(job_order_object["cwl:tool"])
37
38     for k,v in job_order_object.items():
39         if arvados.util.keep_locator_pattern.match(v):
40             job_order_object[k] = {
41                 "class": "File",
42                 "path": keeppath(v)
43             }
44
45     adjustFiles(job_order_object, keeppath)
46
47     runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()))
48
49     t = cwltool.main.load_tool(job_order_object, False, True, runner.arvMakeTool, True)
50
51     args = argparse.Namespace()
52     args.project_uuid = arvados.current_job()["owner_uuid"]
53     args.enable_reuse = True
54     args.submit = False
55     args.debug = True
56     args.quiet = False
57     args.ignore_docker_for_reuse = False
58     outputObj = runner.arvExecutor(t, job_order_object, "", args, cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]})
59
60     files = {}
61     def capture(path):
62         sp = path.split("/")
63         col = sp[0][5:]
64         if col not in files:
65             files[col] = set()
66         files[col].add("/".join(sp[1:]))
67         return path
68
69     adjustFiles(outputObj, capture)
70
71     final = arvados.collection.Collection()
72
73     for k,v in files.iteritems():
74         with arvados.collection.Collection(k) as c:
75             for f in c:
76                 final.copy(f, f, c, True)
77
78     def makeRelative(path):
79         return "/".join(path.split("/")[1:])
80
81     adjustFiles(outputObj, makeRelative)
82
83     with final.open("cwl.output.json", "w") as f:
84         json.dump(outputObj, f, indent=4)
85
86     api.job_tasks().update(uuid=arvados.current_task()['uuid'],
87                                          body={
88                                              'output': final.save_new(create_collection_record=False),
89                                              'success': True,
90                                              'progress':1.0
91                                          }).execute()
92 except Exception as e:
93     logging.exception("Unhandled exception")
94     api.job_tasks().update(uuid=arvados.current_task()['uuid'],
95                                          body={
96                                              'output': None,
97                                              'success': False,
98                                              'progress':1.0
99                                          }).execute()