Merge branch 'origin-8442-cwl-crunch2' closes #8442
[arvados.git] / crunch_scripts / cwl-runner
1 #!/usr/bin/env python
2
3 # Crunch script integration for running arvados-cwl-runner (importing
4 # arvados_cwl module) inside a crunch job.
5 #
6 # This gets the job record, transforms the script parameters into a valid CWL
7 # input object, then executes the CWL runner to run the underlying workflow or
8 # tool.  When the workflow completes, record the output object in an output
9 # collection for this runner job.
10
11 import arvados
12 import arvados_cwl
13 import arvados.collection
14 import arvados.util
15 from cwltool.process import shortname
16 import cwltool.main
17 import logging
18 import os
19 import json
20 import argparse
21 from arvados.api import OrderedJsonModel
22 from cwltool.process import adjustFiles
23 from cwltool.load_tool import load_tool
24
25 # Print package versions
26 logging.info(cwltool.main.versionstring())
27
28 api = arvados.api("v1")
29
30 try:
31     job_order_object = arvados.current_job()['script_parameters']
32
33     def keeppath(v):
34         if arvados.util.keep_locator_pattern.match(v):
35             return "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], v)
36         else:
37             return v
38
39     job_order_object["cwl:tool"] = keeppath(job_order_object["cwl:tool"])
40
41     for k,v in job_order_object.items():
42         if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
43             job_order_object[k] = {
44                 "class": "File",
45                 "path": keeppath(v)
46             }
47
48     adjustFiles(job_order_object, keeppath)
49
50     runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()), work_api="jobs")
51
52     t = load_tool(job_order_object, runner.arvMakeTool)
53
54     args = argparse.Namespace()
55     args.project_uuid = arvados.current_job()["owner_uuid"]
56     args.enable_reuse = True
57     args.submit = False
58     args.debug = True
59     args.quiet = False
60     args.ignore_docker_for_reuse = False
61     args.basedir = os.getcwd()
62     args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
63     outputObj = runner.arvExecutor(t, job_order_object, **vars(args))
64
65     files = {}
66     def capture(path):
67         sp = path.split("/")
68         col = sp[0][5:]
69         if col not in files:
70             files[col] = set()
71         files[col].add("/".join(sp[1:]))
72         return path
73
74     adjustFiles(outputObj, capture)
75
76     final = arvados.collection.Collection()
77
78     for k,v in files.iteritems():
79         with arvados.collection.Collection(k) as c:
80             for f in c:
81                 final.copy(f, f, c, True)
82
83     def makeRelative(path):
84         return "/".join(path.split("/")[1:])
85
86     adjustFiles(outputObj, makeRelative)
87
88     with final.open("cwl.output.json", "w") as f:
89         json.dump(outputObj, f, indent=4)
90
91     api.job_tasks().update(uuid=arvados.current_task()['uuid'],
92                                          body={
93                                              'output': final.save_new(create_collection_record=False),
94                                              'success': True,
95                                              'progress':1.0
96                                          }).execute()
97 except Exception as e:
98     logging.exception("Unhandled exception")
99     api.job_tasks().update(uuid=arvados.current_task()['uuid'],
100                                          body={
101                                              'output': None,
102                                              'success': False,
103                                              'progress':1.0
104                                          }).execute()