Merge branch 'master' into 7478-anm-spot-instances
[arvados.git] / sdk / cwl / arvados_cwl / arvworkflow.py
index 79bfacd28a10daad1393bf090f2350e7c166244c..6f731fd6877b18fc6bc434bd8110fe2b44775196 100644 (file)
@@ -14,6 +14,7 @@ from cwltool.load_tool import fetch_document
 from cwltool.process import shortname
 from cwltool.workflow import Workflow, WorkflowException
 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, visit_class
+from cwltool.builder import Builder
 
 import ruamel.yaml as yaml
 
@@ -26,10 +27,13 @@ from .perf import Perf
 logger = logging.getLogger('arvados.cwl-runner')
 metrics = logging.getLogger('arvados.cwl-runner.metrics')
 
+max_res_pars = ("coresMin", "coresMax", "ramMin", "ramMax", "tmpdirMin", "tmpdirMax")
+sum_res_pars = ("outdirMin", "outdirMax")
+
 def upload_workflow(arvRunner, tool, job_order, project_uuid, uuid=None,
-                    submit_runner_ram=0, name=None):
+                    submit_runner_ram=0, name=None, merged_map=None):
 
-    packed = packed_workflow(arvRunner, tool)
+    packed = packed_workflow(arvRunner, tool, merged_map)
 
     adjustDirObjs(job_order, trim_listing)
     adjustFileObjs(job_order, trim_anonymous_location)
@@ -71,6 +75,37 @@ def dedup_reqs(reqs):
             dedup[r["class"]] = r
     return [dedup[r] for r in sorted(dedup.keys())]
 
+def get_overall_res_req(res_reqs):
+    """Take the overall of a list of ResourceRequirement,
+    i.e., the max of coresMin, coresMax, ramMin, ramMax, tmpdirMin, tmpdirMax
+    and the sum of outdirMin, outdirMax."""
+
+    all_res_req = {}
+    exception_msgs = []
+    for a in max_res_pars + sum_res_pars:
+        all_res_req[a] = []
+        for res_req in res_reqs:
+            if a in res_req:
+                if isinstance(res_req[a], int): # integer check
+                    all_res_req[a].append(res_req[a])
+                else:
+                    msg = SourceLine(res_req, a).makeError(
+                    "Non-top-level ResourceRequirement in single container cannot have expressions")
+                    exception_msgs.append(msg)
+    if exception_msgs:
+        raise WorkflowException("\n".join(exception_msgs))
+    else:
+        overall_res_req = {}
+        for a in all_res_req:
+            if all_res_req[a]:
+                if a in max_res_pars:
+                    overall_res_req[a] = max(all_res_req[a])
+                elif a in sum_res_pars:
+                    overall_res_req[a] = sum(all_res_req[a])
+        if overall_res_req:
+            overall_res_req["class"] = "ResourceRequirement"
+        return cmap(overall_res_req)
+
 class ArvadosWorkflow(Workflow):
     """Wrap cwltool Workflow to override selected methods."""
 
@@ -79,6 +114,9 @@ class ArvadosWorkflow(Workflow):
         self.arvrunner = arvrunner
         self.work_api = kwargs["work_api"]
         self.wf_pdh = None
+        self.dynamic_resource_req = []
+        self.static_resource_req = []
+        self.wf_reffiles = []
 
     def job(self, joborder, output_callback, **kwargs):
         kwargs["work_api"] = self.work_api
@@ -105,6 +143,38 @@ class ArvadosWorkflow(Workflow):
 
                     packed = pack(document_loader, workflowobj, uri, self.metadata)
 
+                    builder = Builder()
+                    builder.job = joborder
+                    builder.requirements = workflowobj["requirements"]
+                    builder.hints = workflowobj["hints"]
+                    builder.resources = {}
+
+                    def visit(item):
+                        for t in ("hints", "requirements"):
+                            if t not in item:
+                                continue
+                            for req in item[t]:
+                                if req["class"] == "ResourceRequirement":
+                                    dyn = False
+                                    for k in max_res_pars + sum_res_pars:
+                                        if k in req:
+                                            if isinstance(req[k], basestring):
+                                                if item["id"] == "#main":
+                                                    # only the top-level requirements/hints may contain expressions
+                                                    self.dynamic_resource_req.append(req)
+                                                    dyn = True
+                                                    break
+                                                else:
+                                                    with SourceLine(req, k, WorkflowException):
+                                                        raise WorkflowException("Non-top-level ResourceRequirement in single container cannot have expressions")
+                                    if not dyn:
+                                        self.static_resource_req.append(req)
+
+                    visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit)
+
+                    if self.static_resource_req:
+                        self.static_resource_req = [get_overall_res_req(self.static_resource_req)]
+
                     upload_dependencies(self.arvrunner,
                                         kwargs.get("name", ""),
                                         document_loader,
@@ -112,18 +182,47 @@ class ArvadosWorkflow(Workflow):
                                         uri,
                                         False)
 
+                    # Discover files/directories referenced by the
+                    # workflow (mainly "default" values)
+                    visit_class(packed, ("File", "Directory"), self.wf_reffiles.append)
+
+
+            if self.dynamic_resource_req:
+                builder = Builder()
+                builder.job = joborder
+                builder.requirements = self.requirements
+                builder.hints = self.hints
+                builder.resources = {}
+
+                # Evaluate dynamic resource requirements using current builder
+                rs = copy.copy(self.static_resource_req)
+                for dyn_rs in self.dynamic_resource_req:
+                    eval_req = {"class": "ResourceRequirement"}
+                    for a in max_res_pars + sum_res_pars:
+                        if a in dyn_rs:
+                            eval_req[a] = builder.do_eval(dyn_rs[a])
+                    rs.append(eval_req)
+                job_res_reqs = [get_overall_res_req(rs)]
+            else:
+                job_res_reqs = self.static_resource_req
+
             with Perf(metrics, "subworkflow adjust"):
                 joborder_resolved = copy.deepcopy(joborder)
                 joborder_keepmount = copy.deepcopy(joborder)
 
                 reffiles = []
-                visit_class(joborder_keepmount, ("File", "Directory"), lambda x: reffiles.append(x))
+                visit_class(joborder_keepmount, ("File", "Directory"), reffiles.append)
 
-                mapper = ArvPathMapper(self.arvrunner, reffiles, kwargs["basedir"],
+                mapper = ArvPathMapper(self.arvrunner, reffiles+self.wf_reffiles, kwargs["basedir"],
                                  "/keep/%s",
                                  "/keep/%s/%s",
                                  **kwargs)
 
+                # For containers API, we need to make sure any extra
+                # referenced files (ie referenced by the workflow but
+                # not in the inputs) are included in the mounts.
+                kwargs["extra_reffiles"] = copy.deepcopy(self.wf_reffiles)
+
                 def keepmount(obj):
                     remove_redundant_fields(obj)
                     with SourceLine(obj, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
@@ -158,7 +257,7 @@ class ArvadosWorkflow(Workflow):
                 "inputs": self.tool["inputs"],
                 "outputs": self.tool["outputs"],
                 "stdout": "cwl.output.json",
-                "requirements": self.requirements+[
+                "requirements": self.requirements+job_res_reqs+[
                     {
                     "class": "InitialWorkDirRequirement",
                     "listing": [{
@@ -173,7 +272,8 @@ class ArvadosWorkflow(Workflow):
                         }]
                 }],
                 "hints": self.hints,
-                "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"]
+                "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"],
+                "id": "#"
             })
             kwargs["loader"] = self.doc_loader
             kwargs["avsc_names"] = self.doc_schema