8442: import fixups
[arvados.git] / sdk / cwl / arvados_cwl / runner.py
1 import os
2 import urlparse
3 from functools import partial
4 import logging
5 import json
6
7 from cwltool.draft2tool import CommandLineTool
8 import cwltool.workflow
9 from cwltool.process import get_feature, scandeps, adjustFiles
10 from cwltool.load_tool import fetch_document
11
12 import arvados.collection
13
14 from .arvdocker import arv_docker_get_image
15 from .pathmapper import ArvPathMapper
16
17 logger = logging.getLogger('arvados.cwl-runner')
18
19 class Runner(object):
20     def __init__(self, runner, tool, job_order, enable_reuse):
21         self.arvrunner = runner
22         self.tool = tool
23         self.job_order = job_order
24         self.running = False
25         self.enable_reuse = enable_reuse
26
27     def update_pipeline_component(self, record):
28         pass
29
30     def upload_docker(self, tool):
31         if isinstance(tool, CommandLineTool):
32             (docker_req, docker_is_req) = get_feature(tool, "DockerRequirement")
33             if docker_req:
34                 arv_docker_get_image(self.arvrunner.api, docker_req, True, self.arvrunner.project_uuid)
35         elif isinstance(tool, cwltool.workflow.Workflow):
36             for s in tool.steps:
37                 self.upload_docker(s.embedded_tool)
38
39
40     def arvados_job_spec(self, *args, **kwargs):
41         self.upload_docker(self.tool)
42
43         workflowfiles = set()
44         jobfiles = set()
45         workflowfiles.add(self.tool.tool["id"])
46
47         self.name = os.path.basename(self.tool.tool["id"])
48
49         def visitFiles(files, path):
50             files.add(path)
51             return path
52
53         document_loader, workflowobj, uri = fetch_document(self.tool.tool["id"])
54         def loadref(b, u):
55             return document_loader.fetch(urlparse.urljoin(b, u))
56
57         sc = scandeps(uri, workflowobj,
58                       set(("$import", "run")),
59                       set(("$include", "$schemas", "path")),
60                       loadref)
61         adjustFiles(sc, partial(visitFiles, workflowfiles))
62         adjustFiles(self.job_order, partial(visitFiles, jobfiles))
63
64         workflowmapper = ArvPathMapper(self.arvrunner, workflowfiles, "",
65                                        "%s",
66                                        "%s/%s",
67                                        name=self.name,
68                                        **kwargs)
69
70         jobmapper = ArvPathMapper(self.arvrunner, jobfiles, "",
71                                   "%s",
72                                   "%s/%s",
73                                   name=os.path.basename(self.job_order.get("id", "#")),
74                                   **kwargs)
75
76         adjustFiles(self.job_order, lambda p: jobmapper.mapper(p)[1])
77
78         if "id" in self.job_order:
79             del self.job_order["id"]
80
81         return workflowmapper
82
83
84     def done(self, record):
85         if record["state"] == "Complete":
86             processStatus = "success"
87         else:
88             processStatus = "permanentFail"
89
90         outputs = None
91         try:
92             try:
93                 outc = arvados.collection.Collection(record["output"])
94                 with outc.open("cwl.output.json") as f:
95                     outputs = json.load(f)
96                 def keepify(path):
97                     if not path.startswith("keep:"):
98                         return "keep:%s/%s" % (record["output"], path)
99                 adjustFiles(outputs, keepify)
100             except Exception as e:
101                 logger.error("While getting final output object: %s", e)
102             self.arvrunner.output_callback(outputs, processStatus)
103         finally:
104             del self.arvrunner.jobs[record["uuid"]]