Merge branch 'master' into 8724-keep-block-check-script
[arvados.git] / crunch_scripts / cwl-runner
1 #!/usr/bin/env python
2
3 # Crunch script integration for running arvados-cwl-runner (importing
4 # arvados_cwl module) inside a crunch job.
5 #
6 # This gets the job record, transforms the script parameters into a valid CWL
7 # input object, then executes the CWL runner to run the underlying workflow or
8 # tool.  When the workflow completes, record the output object in an output
9 # collection for this runner job.
10
11 import arvados
12 import arvados_cwl
13 import arvados.collection
14 import arvados.util
15 from cwltool.process import shortname
16 import cwltool.main
17 import logging
18 import os
19 import json
20 import argparse
21 from arvados.api import OrderedJsonModel
22 from cwltool.process import adjustFiles
23
24 # Print package versions
25 logging.info(cwltool.main.versionstring())
26
27 api = arvados.api("v1")
28
29 try:
30     job_order_object = arvados.current_job()['script_parameters']
31
32     def keeppath(v):
33         if arvados.util.keep_locator_pattern.match(v):
34             return "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], v)
35
36     job_order_object["cwl:tool"] = keeppath(job_order_object["cwl:tool"])
37
38     adjustFiles(job_order_object, keeppath)
39
40     runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()))
41
42     t = cwltool.main.load_tool(job_order_object, False, True, runner.arvMakeTool, True)
43
44     args = argparse.Namespace()
45     args.project_uuid = arvados.current_job()["owner_uuid"]
46     args.enable_reuse = True
47     args.submit = False
48     args.debug = True
49     args.quiet = False
50     args.ignore_docker_for_reuse = False
51     outputObj = runner.arvExecutor(t, job_order_object, "", args, cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]})
52
53     files = {}
54     def capture(path):
55         sp = path.split("/")
56         col = sp[0][5:]
57         if col not in files:
58             files[col] = set()
59         files[col].add("/".join(sp[1:]))
60         return path
61
62     adjustFiles(outputObj, capture)
63
64     final = arvados.collection.Collection()
65
66     for k,v in files.iteritems():
67         with arvados.collection.Collection(k) as c:
68             for f in c:
69                 final.copy(f, f, c, True)
70
71     def makeRelative(path):
72         return "/".join(path.split("/")[1:])
73
74     adjustFiles(outputObj, makeRelative)
75
76     with final.open("cwl.output.json", "w") as f:
77         json.dump(outputObj, f, indent=4)
78
79     api.job_tasks().update(uuid=arvados.current_task()['uuid'],
80                                          body={
81                                              'output': final.save_new(create_collection_record=False),
82                                              'success': True,
83                                              'progress':1.0
84                                          }).execute()
85 except Exception as e:
86     logging.exception("Unhandled exception")
87     api.job_tasks().update(uuid=arvados.current_task()['uuid'],
88                                          body={
89                                              'output': None,
90                                              'success': False,
91                                              'progress':1.0
92                                          }).execute()