14198: Support expressions in TargetCluster[clusterID, ownerUUID]
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Thu, 25 Oct 2018 20:14:50 +0000 (16:14 -0400)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Wed, 21 Nov 2018 18:05:25 +0000 (13:05 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvtool.py
sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/arvados_cwl/context.py

index d9466a83a0fb8a6f07188cdbf11bbbd1276fc1ce..823b41ce995410a016cd61c155574d52fcc6e5f6 100644 (file)
@@ -36,7 +36,7 @@ metrics = logging.getLogger('arvados.cwl-runner.metrics')
 class ArvadosContainer(JobBase):
     """Submit and manage a Crunch container request for executing a CWL CommandLineTool."""
 
-    def __init__(self, runner,
+    def __init__(self, runner, cluster_target,
                  builder,   # type: Builder
                  joborder,  # type: Dict[Text, Union[Dict[Text, Any], List, Text]]
                  make_path_mapper,  # type: Callable[..., PathMapper]
@@ -46,6 +46,7 @@ class ArvadosContainer(JobBase):
     ):
         super(ArvadosContainer, self).__init__(builder, joborder, make_path_mapper, requirements, hints, name)
         self.arvrunner = runner
+        self.cluster_target = cluster_target
         self.running = False
         self.uuid = None
 
@@ -251,13 +252,11 @@ class ArvadosContainer(JobBase):
             scheduling_parameters["max_run_time"] = self.timelimit
 
         extra_submit_params = {}
-        cluster_target_req, _ = self.get_requirement("http://arvados.org/cwl#ClusterTarget")
-        if cluster_target_req:
-            cluster_id = cluster_target_req.get("clusterID")
-            if cluster_id:
-                extra_submit_params["cluster_id"] = cluster_id
-            if cluster_target_req.get("ownerUUID"):
-                container_request["owner_uuid"] = cluster_target_req.get("ownerUUID")
+        if self.cluster_target is not None:
+            if self.cluster_target.cluster_id:
+                extra_submit_params["cluster_id"] = self.cluster_target.cluster_id
+            if self.cluster_target.owner_uuid:
+                container_request["owner_uuid"] = self.cluster_target.owner_uuid
 
         container_request["output_name"] = "Output for step %s" % (self.name)
         container_request["output_ttl"] = self.output_ttl
index 119acc30392ceb9f124a6d0101c0868beeb6c1ae..e0997db5bbb4fbeade9c1d324dbd30c9462db911 100644 (file)
@@ -6,6 +6,7 @@ from cwltool.command_line_tool import CommandLineTool
 from .arvjob import ArvadosJob
 from .arvcontainer import ArvadosContainer
 from .pathmapper import ArvPathMapper
+from .context import ClusterTarget
 from functools import partial
 
 class ArvadosCommandTool(CommandLineTool):
@@ -17,7 +18,7 @@ class ArvadosCommandTool(CommandLineTool):
 
     def make_job_runner(self, runtimeContext):
         if runtimeContext.work_api == "containers":
-            return partial(ArvadosContainer, self.arvrunner)
+            return partial(ArvadosContainer, self.arvrunner, runtimeContext.cluster_target)
         elif runtimeContext.work_api == "jobs":
             return partial(ArvadosJob, self.arvrunner)
         else:
@@ -44,6 +45,12 @@ class ArvadosCommandTool(CommandLineTool):
 
         runtimeContext = runtimeContext.copy()
 
+        cluster_target_req, _ = self.get_requirement("http://arvados.org/cwl#ClusterTarget")
+        if runtimeContext.cluster_target is None or runtimeContext.cluster_target.instance != id(cluster_target_req):
+            runtimeContext.cluster_target = ClusterTarget(id(cluster_target_req),
+                                                          builder.do_eval(cluster_target_req.get("clusterID")),
+                                                          builder.do_eval(cluster_target_req.get("ownerUUID")))
+
         if runtimeContext.work_api == "containers":
             dockerReq, is_req = self.get_requirement("DockerRequirement")
             if dockerReq and dockerReq.get("dockerOutputDirectory"):
index ae90625102ff155cd67daa44d4ab4384aa996866..f86641bfd60bdeeb3e663036ee641d7e7c9499f9 100644 (file)
@@ -131,158 +131,167 @@ class ArvadosWorkflow(Workflow):
         self.loadingContext = loadingContext
 
     def job(self, joborder, output_callback, runtimeContext):
+
+        cluster_target_req, _ = self.get_requirement("http://arvados.org/cwl#ClusterTarget")
+        if runtimeContext.cluster_target is None or runtimeContext.cluster_target.instance != id(cluster_target_req):
+            runtimeContext.cluster_target = ClusterTarget(id(cluster_target_req),
+                                                          builder.do_eval(cluster_target_req.get("clusterID")),
+                                                          builder.do_eval(cluster_target_req.get("ownerUUID")))
+
         req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
-        if req:
-            with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
-                if "id" not in self.tool:
-                    raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
-            document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
+        if not req:
+            return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)
 
-            discover_secondary_files(self.tool["inputs"], joborder)
+        # RunInSingleContainer is true
 
-            with Perf(metrics, "subworkflow upload_deps"):
-                upload_dependencies(self.arvrunner,
-                                    os.path.basename(joborder.get("id", "#")),
-                                    document_loader,
-                                    joborder,
-                                    joborder.get("id", "#"),
-                                    False)
+        with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
+            if "id" not in self.tool:
+                raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
+        document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
+
+        discover_secondary_files(self.tool["inputs"], joborder)
+
+        with Perf(metrics, "subworkflow upload_deps"):
+            upload_dependencies(self.arvrunner,
+                                os.path.basename(joborder.get("id", "#")),
+                                document_loader,
+                                joborder,
+                                joborder.get("id", "#"),
+                                False)
+
+            if self.wf_pdh is None:
+                workflowobj["requirements"] = dedup_reqs(self.requirements)
+                workflowobj["hints"] = dedup_reqs(self.hints)
+
+                packed = pack(document_loader, workflowobj, uri, self.metadata)
 
-                if self.wf_pdh is None:
-                    workflowobj["requirements"] = dedup_reqs(self.requirements)
-                    workflowobj["hints"] = dedup_reqs(self.hints)
-
-                    packed = pack(document_loader, workflowobj, uri, self.metadata)
-
-                    builder = Builder(joborder,
-                                      requirements=workflowobj["requirements"],
-                                      hints=workflowobj["hints"],
-                                      resources={})
-
-                    def visit(item):
-                        for t in ("hints", "requirements"):
-                            if t not in item:
-                                continue
-                            for req in item[t]:
-                                if req["class"] == "ResourceRequirement":
-                                    dyn = False
-                                    for k in max_res_pars + sum_res_pars:
-                                        if k in req:
-                                            if isinstance(req[k], basestring):
-                                                if item["id"] == "#main":
-                                                    # only the top-level requirements/hints may contain expressions
-                                                    self.dynamic_resource_req.append(req)
-                                                    dyn = True
-                                                    break
-                                                else:
-                                                    with SourceLine(req, k, WorkflowException):
-                                                        raise WorkflowException("Non-top-level ResourceRequirement in single container cannot have expressions")
-                                    if not dyn:
-                                        self.static_resource_req.append(req)
-
-                    visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit)
-
-                    if self.static_resource_req:
-                        self.static_resource_req = [get_overall_res_req(self.static_resource_req)]
-
-                    upload_dependencies(self.arvrunner,
-                                        runtimeContext.name,
-                                        document_loader,
-                                        packed,
-                                        uri,
-                                        False)
-
-                    # Discover files/directories referenced by the
-                    # workflow (mainly "default" values)
-                    visit_class(packed, ("File", "Directory"), self.wf_reffiles.append)
-
-
-            if self.dynamic_resource_req:
                 builder = Builder(joborder,
-                                  requirements=self.requirements,
-                                  hints=self.hints,
+                                  requirements=workflowobj["requirements"],
+                                  hints=workflowobj["hints"],
                                   resources={})
 
-                # Evaluate dynamic resource requirements using current builder
-                rs = copy.copy(self.static_resource_req)
-                for dyn_rs in self.dynamic_resource_req:
-                    eval_req = {"class": "ResourceRequirement"}
-                    for a in max_res_pars + sum_res_pars:
-                        if a in dyn_rs:
-                            eval_req[a] = builder.do_eval(dyn_rs[a])
-                    rs.append(eval_req)
-                job_res_reqs = [get_overall_res_req(rs)]
-            else:
-                job_res_reqs = self.static_resource_req
-
-            with Perf(metrics, "subworkflow adjust"):
-                joborder_resolved = copy.deepcopy(joborder)
-                joborder_keepmount = copy.deepcopy(joborder)
-
-                reffiles = []
-                visit_class(joborder_keepmount, ("File", "Directory"), reffiles.append)
-
-                mapper = ArvPathMapper(self.arvrunner, reffiles+self.wf_reffiles, runtimeContext.basedir,
-                                       "/keep/%s",
-                                       "/keep/%s/%s")
-
-                # For containers API, we need to make sure any extra
-                # referenced files (ie referenced by the workflow but
-                # not in the inputs) are included in the mounts.
-                if self.wf_reffiles:
-                    runtimeContext = runtimeContext.copy()
-                    runtimeContext.extra_reffiles = copy.deepcopy(self.wf_reffiles)
-
-                def keepmount(obj):
-                    remove_redundant_fields(obj)
-                    with SourceLine(obj, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
-                        if "location" not in obj:
-                            raise WorkflowException("%s object is missing required 'location' field: %s" % (obj["class"], obj))
-                    with SourceLine(obj, "location", WorkflowException, logger.isEnabledFor(logging.DEBUG)):
-                        if obj["location"].startswith("keep:"):
-                            obj["location"] = mapper.mapper(obj["location"]).target
-                            if "listing" in obj:
-                                del obj["listing"]
-                        elif obj["location"].startswith("_:"):
-                            del obj["location"]
-                        else:
-                            raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])
-
-                visit_class(joborder_keepmount, ("File", "Directory"), keepmount)
-
-                def resolved(obj):
-                    if obj["location"].startswith("keep:"):
-                        obj["location"] = mapper.mapper(obj["location"]).resolved
-
-                visit_class(joborder_resolved, ("File", "Directory"), resolved)
-
-                if self.wf_pdh is None:
-                    adjustFileObjs(packed, keepmount)
-                    adjustDirObjs(packed, keepmount)
-                    self.wf_pdh = upload_workflow_collection(self.arvrunner, shortname(self.tool["id"]), packed)
-
-            wf_runner = cmap({
-                "class": "CommandLineTool",
-                "baseCommand": "cwltool",
-                "inputs": self.tool["inputs"],
-                "outputs": self.tool["outputs"],
-                "stdout": "cwl.output.json",
-                "requirements": self.requirements+job_res_reqs+[
-                    {"class": "InlineJavascriptRequirement"},
-                    {
-                    "class": "InitialWorkDirRequirement",
-                    "listing": [{
-                            "entryname": "workflow.cwl",
-                            "entry": '$({"class": "File", "location": "keep:%s/workflow.cwl"})' % self.wf_pdh
-                        }, {
-                            "entryname": "cwl.input.yml",
-                            "entry": json.dumps(joborder_keepmount, indent=2, sort_keys=True, separators=(',',': ')).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
-                        }]
-                }],
-                "hints": self.hints,
-                "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"],
-                "id": "#"
-            })
-            return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext)
+                def visit(item):
+                    for t in ("hints", "requirements"):
+                        if t not in item:
+                            continue
+                        for req in item[t]:
+                            if req["class"] == "ResourceRequirement":
+                                dyn = False
+                                for k in max_res_pars + sum_res_pars:
+                                    if k in req:
+                                        if isinstance(req[k], basestring):
+                                            if item["id"] == "#main":
+                                                # only the top-level requirements/hints may contain expressions
+                                                self.dynamic_resource_req.append(req)
+                                                dyn = True
+                                                break
+                                            else:
+                                                with SourceLine(req, k, WorkflowException):
+                                                    raise WorkflowException("Non-top-level ResourceRequirement in single container cannot have expressions")
+                                if not dyn:
+                                    self.static_resource_req.append(req)
+
+                visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit)
+
+                if self.static_resource_req:
+                    self.static_resource_req = [get_overall_res_req(self.static_resource_req)]
+
+                upload_dependencies(self.arvrunner,
+                                    runtimeContext.name,
+                                    document_loader,
+                                    packed,
+                                    uri,
+                                    False)
+
+                # Discover files/directories referenced by the
+                # workflow (mainly "default" values)
+                visit_class(packed, ("File", "Directory"), self.wf_reffiles.append)
+
+
+        if self.dynamic_resource_req:
+            builder = Builder(joborder,
+                              requirements=self.requirements,
+                              hints=self.hints,
+                              resources={})
+
+            # Evaluate dynamic resource requirements using current builder
+            rs = copy.copy(self.static_resource_req)
+            for dyn_rs in self.dynamic_resource_req:
+                eval_req = {"class": "ResourceRequirement"}
+                for a in max_res_pars + sum_res_pars:
+                    if a in dyn_rs:
+                        eval_req[a] = builder.do_eval(dyn_rs[a])
+                rs.append(eval_req)
+            job_res_reqs = [get_overall_res_req(rs)]
         else:
-            return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)
+            job_res_reqs = self.static_resource_req
+
+        with Perf(metrics, "subworkflow adjust"):
+            joborder_resolved = copy.deepcopy(joborder)
+            joborder_keepmount = copy.deepcopy(joborder)
+
+            reffiles = []
+            visit_class(joborder_keepmount, ("File", "Directory"), reffiles.append)
+
+            mapper = ArvPathMapper(self.arvrunner, reffiles+self.wf_reffiles, runtimeContext.basedir,
+                                   "/keep/%s",
+                                   "/keep/%s/%s")
+
+            # For containers API, we need to make sure any extra
+            # referenced files (ie referenced by the workflow but
+            # not in the inputs) are included in the mounts.
+            if self.wf_reffiles:
+                runtimeContext = runtimeContext.copy()
+                runtimeContext.extra_reffiles = copy.deepcopy(self.wf_reffiles)
+
+            def keepmount(obj):
+                remove_redundant_fields(obj)
+                with SourceLine(obj, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
+                    if "location" not in obj:
+                        raise WorkflowException("%s object is missing required 'location' field: %s" % (obj["class"], obj))
+                with SourceLine(obj, "location", WorkflowException, logger.isEnabledFor(logging.DEBUG)):
+                    if obj["location"].startswith("keep:"):
+                        obj["location"] = mapper.mapper(obj["location"]).target
+                        if "listing" in obj:
+                            del obj["listing"]
+                    elif obj["location"].startswith("_:"):
+                        del obj["location"]
+                    else:
+                        raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])
+
+            visit_class(joborder_keepmount, ("File", "Directory"), keepmount)
+
+            def resolved(obj):
+                if obj["location"].startswith("keep:"):
+                    obj["location"] = mapper.mapper(obj["location"]).resolved
+
+            visit_class(joborder_resolved, ("File", "Directory"), resolved)
+
+            if self.wf_pdh is None:
+                adjustFileObjs(packed, keepmount)
+                adjustDirObjs(packed, keepmount)
+                self.wf_pdh = upload_workflow_collection(self.arvrunner, shortname(self.tool["id"]), packed)
+
+        wf_runner = cmap({
+            "class": "CommandLineTool",
+            "baseCommand": "cwltool",
+            "inputs": self.tool["inputs"],
+            "outputs": self.tool["outputs"],
+            "stdout": "cwl.output.json",
+            "requirements": self.requirements+job_res_reqs+[
+                {"class": "InlineJavascriptRequirement"},
+                {
+                "class": "InitialWorkDirRequirement",
+                "listing": [{
+                        "entryname": "workflow.cwl",
+                        "entry": '$({"class": "File", "location": "keep:%s/workflow.cwl"})' % self.wf_pdh
+                    }, {
+                        "entryname": "cwl.input.yml",
+                        "entry": json.dumps(joborder_keepmount, indent=2, sort_keys=True, separators=(',',': ')).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
+                    }]
+            }],
+            "hints": self.hints,
+            "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"],
+            "id": "#"
+        })
+        return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext)
index 4e1334c1c130ab2e3822ce2f8e14093ad662c254..23e7b91a0eef67aa30f3eb5d14578d0c0b310fd2 100644 (file)
@@ -3,11 +3,14 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from cwltool.context import LoadingContext, RuntimeContext
+from collections import namedtuple
 
 class ArvLoadingContext(LoadingContext):
     def __init__(self, kwargs=None):
         super(ArvLoadingContext, self).__init__(kwargs)
 
+ClusterTarget = namedtuple("ClusterTarget", ("instance", "cluster_id", "owner_uuid"))
+
 class ArvRuntimeContext(RuntimeContext):
     def __init__(self, kwargs=None):
         self.work_api = None
@@ -31,6 +34,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.current_container = None
         self.http_timeout = 300
         self.submit_runner_cluster = None
+        self.cluster_target = None
 
         super(ArvRuntimeContext, self).__init__(kwargs)