Merge branch '9964-output-glob-acr' refs #9964
[arvados.git] / sdk / cwl / arvados_cwl / arvtool.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from cwltool.command_line_tool import CommandLineTool, ExpressionTool
6 from .arvcontainer import ArvadosContainer
7 from .pathmapper import ArvPathMapper
8 from .runner import make_builder
9 from ._version import __version__
10 from functools import partial
11 from schema_salad.sourceline import SourceLine
12 from cwltool.errors import WorkflowException
13 from arvados.util import portable_data_hash_pattern
14 from cwltool.utils import aslist
15 from cwltool.builder import substitute
16
17 from typing import Sequence, Mapping
18
19 def validate_cluster_target(arvrunner, runtimeContext):
20     if (runtimeContext.submit_runner_cluster and
21         runtimeContext.submit_runner_cluster not in arvrunner.api._rootDesc["remoteHosts"] and
22         runtimeContext.submit_runner_cluster != arvrunner.api._rootDesc["uuidPrefix"]):
23         raise WorkflowException("Unknown or invalid cluster id '%s' known remote clusters are %s" % (runtimeContext.submit_runner_cluster,
24                                                                                                      ", ".join(list(arvrunner.api._rootDesc["remoteHosts"].keys()))))
25     if runtimeContext.project_uuid:
26         cluster_target = runtimeContext.submit_runner_cluster or arvrunner.api._rootDesc["uuidPrefix"]
27         if not runtimeContext.project_uuid.startswith(cluster_target):
28             raise WorkflowException("Project uuid '%s' should start with id of target cluster '%s'" % (runtimeContext.project_uuid, cluster_target))
29
30         try:
31             if runtimeContext.project_uuid[5:12] == '-tpzed-':
32                 arvrunner.api.users().get(uuid=runtimeContext.project_uuid).execute()
33             else:
34                 proj = arvrunner.api.groups().get(uuid=runtimeContext.project_uuid).execute()
35                 if proj["group_class"] != "project":
36                     raise Exception("not a project, group_class is '%s'" % (proj["group_class"]))
37         except Exception as e:
38             raise WorkflowException("Invalid project uuid '%s': %s" % (runtimeContext.project_uuid, e))
39
40 def set_cluster_target(tool, arvrunner, builder, runtimeContext):
41     cluster_target_req = None
42     for field in ("hints", "requirements"):
43         if field not in tool:
44             continue
45         for item in tool[field]:
46             if item["class"] == "http://arvados.org/cwl#ClusterTarget":
47                 cluster_target_req = item
48
49     if cluster_target_req is None:
50         return runtimeContext
51
52     with SourceLine(cluster_target_req, None, WorkflowException, runtimeContext.debug):
53         runtimeContext = runtimeContext.copy()
54         runtimeContext.submit_runner_cluster = builder.do_eval(cluster_target_req.get("cluster_id")) or runtimeContext.submit_runner_cluster
55         runtimeContext.project_uuid = builder.do_eval(cluster_target_req.get("project_uuid")) or runtimeContext.project_uuid
56         validate_cluster_target(arvrunner, runtimeContext)
57
58     return runtimeContext
59
60
61 class ArvadosCommandTool(CommandLineTool):
62     """Wrap cwltool CommandLineTool to override selected methods."""
63
64     def __init__(self, arvrunner, toolpath_object, loadingContext):
65         super(ArvadosCommandTool, self).__init__(toolpath_object, loadingContext)
66
67         (docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
68         if not docker_req:
69             if portable_data_hash_pattern.match(loadingContext.default_docker_image):
70                 self.hints.append({"class": "DockerRequirement",
71                                    "http://arvados.org/cwl#dockerCollectionPDH": loadingContext.default_docker_image})
72             else:
73                 self.hints.append({"class": "DockerRequirement",
74                                    "dockerPull": loadingContext.default_docker_image})
75
76         self.arvrunner = arvrunner
77         self.globpatterns = []
78         self._collect_globs(toolpath_object["outputs"])
79
80     def _collect_globs(self, inputschema):
81         if isinstance(inputschema, str):
82             return
83
84         if isinstance(inputschema, Sequence):
85             for i in inputschema:
86                 self._collect_globs(i)
87
88         if isinstance(inputschema, Mapping):
89             if "type" in inputschema:
90                 self._collect_globs(inputschema["type"])
91                 if inputschema["type"] == "record":
92                     for field in inputschema["fields"]:
93                         self._collect_globs(field)
94
95             if "outputBinding" in inputschema and "glob" in inputschema["outputBinding"]:
96                 for gb in aslist(inputschema["outputBinding"]["glob"]):
97                     self.globpatterns.append(gb)
98                 if "secondaryFiles" in inputschema:
99                     for sf in aslist(inputschema["secondaryFiles"]):
100                         if "$(" in sf["pattern"] or "${" in sf["pattern"]:
101                             self.globpatterns.append("**")
102                         else:
103                             for gb in aslist(inputschema["outputBinding"]["glob"]):
104                                 subst = substitute(gb, sf["pattern"])
105                                 self.globpatterns.append(subst)
106
107
108     def make_job_runner(self, runtimeContext):
109         if runtimeContext.work_api == "containers":
110             return partial(ArvadosContainer, self.arvrunner, runtimeContext, self.globpatterns)
111         else:
112             raise Exception("Unsupported work_api %s", runtimeContext.work_api)
113
114     def make_path_mapper(self, reffiles, stagedir, runtimeContext, separateDirs):
115         if runtimeContext.work_api == "containers":
116             return ArvPathMapper(self.arvrunner, reffiles+runtimeContext.extra_reffiles, runtimeContext.basedir,
117                                  "/keep/%s",
118                                  "/keep/%s/%s")
119
120     def job(self, joborder, output_callback, runtimeContext):
121         builder = make_builder(joborder, self.hints, self.requirements, runtimeContext, self.metadata)
122         runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
123
124         if runtimeContext.work_api == "containers":
125             dockerReq, is_req = self.get_requirement("DockerRequirement")
126             if dockerReq and dockerReq.get("dockerOutputDirectory"):
127                 runtimeContext.outdir = dockerReq.get("dockerOutputDirectory")
128                 runtimeContext.docker_outdir = dockerReq.get("dockerOutputDirectory")
129             else:
130                 runtimeContext.outdir = "/var/spool/cwl"
131                 runtimeContext.docker_outdir = "/var/spool/cwl"
132         return super(ArvadosCommandTool, self).job(joborder, output_callback, runtimeContext)
133
134 class ArvadosExpressionTool(ExpressionTool):
135     def __init__(self, arvrunner, toolpath_object, loadingContext):
136         super(ArvadosExpressionTool, self).__init__(toolpath_object, loadingContext)
137         self.arvrunner = arvrunner
138
139     def job(self,
140             job_order,         # type: Mapping[Text, Text]
141             output_callback,  # type: Callable[[Any, Any], Any]
142             runtimeContext     # type: RuntimeContext
143            ):
144         return super(ArvadosExpressionTool, self).job(job_order, self.arvrunner.get_wrapped_callback(output_callback), runtimeContext)