8442: Tweak internal handling of keep: paths, examine exit codes to determine
[arvados.git] / sdk / cwl / arvados_cwl / runner.py
1 import os
2 import urlparse
3 from functools import partial
4 import logging
5 import json
6
7 from cwltool.draft2tool import CommandLineTool
8 import cwltool.workflow
9 from cwltool.process import get_feature, scandeps, adjustFiles, UnsupportedRequirement
10 from cwltool.load_tool import fetch_document
11
12 import arvados.collection
13
14 from .arvdocker import arv_docker_get_image
15 from .pathmapper import ArvPathMapper
16
17 logger = logging.getLogger('arvados.cwl-runner')
18
19 class Runner(object):
20     def __init__(self, runner, tool, job_order, enable_reuse):
21         self.arvrunner = runner
22         self.tool = tool
23         self.job_order = job_order
24         self.running = False
25         self.enable_reuse = enable_reuse
26
27     def update_pipeline_component(self, record):
28         pass
29
30     def upload_docker(self, tool):
31         if isinstance(tool, CommandLineTool):
32             (docker_req, docker_is_req) = get_feature(tool, "DockerRequirement")
33             if docker_req:
34                 arv_docker_get_image(self.arvrunner.api, docker_req, True, self.arvrunner.project_uuid)
35         elif isinstance(tool, cwltool.workflow.Workflow):
36             for s in tool.steps:
37                 self.upload_docker(s.embedded_tool)
38
39
40     def arvados_job_spec(self, *args, **kwargs):
41         self.upload_docker(self.tool)
42
43         workflowfiles = set()
44         jobfiles = set()
45         workflowfiles.add(self.tool.tool["id"])
46
47         self.name = os.path.basename(self.tool.tool["id"])
48
49         def visitFiles(files, path):
50             files.add(path)
51             return path
52
53         document_loader, workflowobj, uri = fetch_document(self.tool.tool["id"])
54         def loadref(b, u):
55             return document_loader.fetch(urlparse.urljoin(b, u))
56
57         sc = scandeps(uri, workflowobj,
58                       set(("$import", "run")),
59                       set(("$include", "$schemas", "path")),
60                       loadref)
61         adjustFiles(sc, partial(visitFiles, workflowfiles))
62         adjustFiles(self.job_order, partial(visitFiles, jobfiles))
63
64         keepprefix = kwargs.get("keepprefix", "")
65         workflowmapper = ArvPathMapper(self.arvrunner, workflowfiles, "",
66                                        keepprefix+"%s",
67                                        keepprefix+"%s/%s",
68                                        name=self.name,
69                                        **kwargs)
70
71         jobmapper = ArvPathMapper(self.arvrunner, jobfiles, "",
72                                   keepprefix+"%s",
73                                   keepprefix+"%s/%s",
74                                   name=os.path.basename(self.job_order.get("id", "#")),
75                                   **kwargs)
76
77         adjustFiles(self.job_order, lambda p: jobmapper.mapper(p)[1])
78
79         if "id" in self.job_order:
80             del self.job_order["id"]
81
82         return workflowmapper
83
84
85     def done(self, record):
86         if record["state"] == "Complete":
87             if record.get("exit_code") is not None:
88                 if record["exit_code"] == 33:
89                     processStatus = "UnsupportedRequirement"
90                 elif record["exit_code"] == 0:
91                     processStatus = "success"
92                 else:
93                     processStatus = "permanentFail"
94             else:
95                 processStatus = "success"
96         else:
97             processStatus = "permanentFail"
98
99         outputs = None
100         try:
101             try:
102                 outc = arvados.collection.Collection(record["output"])
103                 with outc.open("cwl.output.json") as f:
104                     outputs = json.load(f)
105                 def keepify(path):
106                     if not path.startswith("keep:"):
107                         return "keep:%s/%s" % (record["output"], path)
108                     else:
109                         return path
110                 adjustFiles(outputs, keepify)
111             except Exception as e:
112                 logger.error("While getting final output object: %s", e)
113             self.arvrunner.output_callback(outputs, processStatus)
114         finally:
115             del self.arvrunner.jobs[record["uuid"]]