10129: Trim listing from Directory objects that are keep references, because
[arvados.git] / sdk / cwl / arvados_cwl / arvworkflow.py
1 import os
2 import json
3 import copy
4 import logging
5
6 from cwltool.pack import pack
7 from cwltool.load_tool import fetch_document
8 from cwltool.process import shortname
9 from cwltool.workflow import Workflow, WorkflowException
10 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs
11
12 import ruamel.yaml as yaml
13
14 from .runner import upload_docker, upload_dependencies, del_listing
15 from .arvtool import ArvadosCommandTool
16
17 logger = logging.getLogger('arvados.cwl-runner')
18
19 def upload_workflow(arvRunner, tool, job_order, project_uuid, update_uuid):
20     upload_docker(arvRunner, tool)
21
22     document_loader, workflowobj, uri = (tool.doc_loader, tool.doc_loader.fetch(tool.tool["id"]), tool.tool["id"])
23
24     packed = pack(document_loader, workflowobj, uri, tool.metadata)
25
26     adjustDirObjs(job_order, del_listing)
27
28     main = [p for p in packed["$graph"] if p["id"] == "#main"][0]
29     for inp in main["inputs"]:
30         sn = shortname(inp["id"])
31         if sn in job_order:
32             inp["default"] = job_order[sn]
33
34     name = os.path.basename(tool.tool["id"])
35     upload_dependencies(arvRunner, name, document_loader,
36                         packed, uri, False)
37
38     body = {
39         "workflow": {
40             "owner_uuid": project_uuid,
41             "name": tool.tool.get("label", name),
42             "description": tool.tool.get("doc", ""),
43             "definition":yaml.safe_dump(packed)
44         }}
45
46     if update_uuid:
47         return arvRunner.api.workflows().update(uuid=update_uuid, body=body).execute(num_retries=arvRunner.num_retries)["uuid"]
48     else:
49         return arvRunner.api.workflows().create(body=body).execute(num_retries=arvRunner.num_retries)["uuid"]
50
51 class ArvadosWorkflow(Workflow):
52     """Wrap cwltool Workflow to override selected methods."""
53
54     def __init__(self, arvrunner, toolpath_object, **kwargs):
55         super(ArvadosWorkflow, self).__init__(toolpath_object, **kwargs)
56         self.arvrunner = arvrunner
57         self.work_api = kwargs["work_api"]
58
59     def job(self, joborder, output_callback, **kwargs):
60         kwargs["work_api"] = self.work_api
61         req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
62         if req:
63             document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
64
65             workflowobj["requirements"] = self.requirements + workflowobj.get("requirements", [])
66             workflowobj["hints"] = self.hints + workflowobj.get("hints", [])
67             packed = pack(document_loader, workflowobj, uri, self.metadata)
68
69             upload_dependencies(self.arvrunner,
70                                 kwargs.get("name", ""),
71                                 document_loader,
72                                 packed,
73                                 uri,
74                                 False)
75
76             upload_dependencies(self.arvrunner,
77                                 os.path.basename(joborder.get("id", "#")),
78                                 document_loader,
79                                 joborder,
80                                 joborder.get("id", "#"),
81                                 False)
82
83             joborder_keepmount = copy.deepcopy(joborder)
84
85             def keepmount(obj):
86                 if obj["location"].startswith("keep:"):
87                     obj["location"] = "/keep/" + obj["location"][5:]
88                 elif obj["location"].startswith("_:"):
89                     pass
90                 else:
91                     raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])
92                 if "listing" in obj:
93                     del obj["listing"]
94             adjustFileObjs(joborder_keepmount, keepmount)
95             adjustDirObjs(joborder_keepmount, keepmount)
96             adjustFileObjs(packed, keepmount)
97             adjustDirObjs(packed, keepmount)
98
99             wf_runner = {
100                 "class": "CommandLineTool",
101                 "baseCommand": "cwltool",
102                 "inputs": self.tool["inputs"],
103                 "outputs": self.tool["outputs"],
104                 "stdout": "cwl.output.json",
105                 "requirements": workflowobj["requirements"]+[
106                     {"class": "InlineJavascriptRequirement"},
107                     {
108                     "class": "InitialWorkDirRequirement",
109                     "listing": [{
110                             "entryname": "workflow.cwl",
111                             "entry": yaml.safe_dump(packed).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
112                         }, {
113                             "entryname": "cwl.input.yml",
114                             "entry": yaml.safe_dump(joborder_keepmount).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
115                         }]
116                 }],
117                 "hints": workflowobj["hints"],
118                 "arguments": ["--no-container", "--move-outputs", "workflow.cwl#main", "cwl.input.yml"]
119             }
120             kwargs["loader"] = self.doc_loader
121             kwargs["avsc_names"] = self.doc_schema
122             return ArvadosCommandTool(self.arvrunner, wf_runner, **kwargs).job(joborder, output_callback, **kwargs)
123         else:
124             return super(ArvadosWorkflow, self).job(joborder, output_callback, **kwargs)