Merge branch '16265-security-updates' into dependabot/bundler/apps/workbench/rake...
[arvados.git] / sdk / cwl / arvados_cwl / arvworkflow.py
index f514476b9099895d3739dbb34ab0a0c372bd7e73..ddd3c00764c7b0fb42fe97adf50622bd1b23e9cb 100644 (file)
@@ -2,15 +2,19 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from past.builtins import basestring
+from future.utils import viewitems
+
 import os
 import json
 import copy
 import logging
 
 from schema_salad.sourceline import SourceLine, cmap
+import schema_salad.ref_resolver
 
 from cwltool.pack import pack
-from cwltool.load_tool import fetch_document
+from cwltool.load_tool import fetch_document, resolve_and_validate_document
 from cwltool.process import shortname
 from cwltool.workflow import Workflow, WorkflowException, WorkflowStep
 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, visit_class
@@ -19,9 +23,11 @@ from cwltool.context import LoadingContext
 import ruamel.yaml as yaml
 
 from .runner import (upload_dependencies, packed_workflow, upload_workflow_collection,
-                     trim_anonymous_location, remove_redundant_fields, discover_secondary_files)
+                     trim_anonymous_location, remove_redundant_fields, discover_secondary_files,
+                     make_builder)
 from .pathmapper import ArvPathMapper, trim_listing
-from .arvtool import ArvadosCommandTool, set_cluster_target, make_builder
+from .arvtool import ArvadosCommandTool, set_cluster_target
+
 from .perf import Perf
 
 logger = logging.getLogger('arvados.cwl-runner')
@@ -135,7 +141,7 @@ class ArvadosWorkflowStep(WorkflowStep):
         runtimeContext = runtimeContext.copy()
         runtimeContext.toplevel = True  # Preserve behavior for #13365
 
-        builder = make_builder({shortname(k): v for k,v in joborder.items()}, self.hints, self.requirements, runtimeContext)
+        builder = make_builder({shortname(k): v for k,v in viewitems(joborder)}, self.hints, self.requirements, runtimeContext)
         runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
         return super(ArvadosWorkflowStep, self).job(joborder, output_callback, runtimeContext)
 
@@ -155,7 +161,7 @@ class ArvadosWorkflow(Workflow):
 
     def job(self, joborder, output_callback, runtimeContext):
 
-        builder = self._init_job(joborder, runtimeContext)
+        builder = make_builder(joborder, self.hints, self.requirements, runtimeContext)
         runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
 
         req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
@@ -167,25 +173,29 @@ class ArvadosWorkflow(Workflow):
         with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
             if "id" not in self.tool:
                 raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
-        document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
 
-        discover_secondary_files(self.tool["inputs"], joborder)
+        discover_secondary_files(self.arvrunner.fs_access, builder,
+                                 self.tool["inputs"], joborder)
 
         with Perf(metrics, "subworkflow upload_deps"):
             upload_dependencies(self.arvrunner,
                                 os.path.basename(joborder.get("id", "#")),
-                                document_loader,
+                                self.doc_loader,
                                 joborder,
                                 joborder.get("id", "#"),
                                 False)
 
             if self.wf_pdh is None:
-                workflowobj["requirements"] = dedup_reqs(self.requirements)
-                workflowobj["hints"] = dedup_reqs(self.hints)
+                packed = pack(self.loadingContext, self.tool["id"], loader=self.doc_loader)
 
-                packed = pack(document_loader, workflowobj, uri, self.metadata)
+                for p in packed["$graph"]:
+                    if p["id"] == "#main":
+                        p["requirements"] = dedup_reqs(self.requirements)
+                        p["hints"] = dedup_reqs(self.hints)
 
                 def visit(item):
+                    if "requirements" in item:
+                        item["requirements"] = [i for i in item["requirements"] if i["class"] != "DockerRequirement"]
                     for t in ("hints", "requirements"):
                         if t not in item:
                             continue
@@ -213,9 +223,9 @@ class ArvadosWorkflow(Workflow):
 
                 upload_dependencies(self.arvrunner,
                                     runtimeContext.name,
-                                    document_loader,
+                                    self.doc_loader,
                                     packed,
-                                    uri,
+                                    self.tool["id"],
                                     False)
 
                 # Discover files/directories referenced by the
@@ -282,6 +292,20 @@ class ArvadosWorkflow(Workflow):
                 adjustDirObjs(packed, keepmount)
                 self.wf_pdh = upload_workflow_collection(self.arvrunner, shortname(self.tool["id"]), packed)
 
+        self.loadingContext = self.loadingContext.copy()
+        self.loadingContext.metadata = self.loadingContext.metadata.copy()
+        self.loadingContext.metadata["http://commonwl.org/cwltool#original_cwlVersion"] = "v1.0"
+
+        if len(job_res_reqs) == 1:
+            # RAM request needs to be at least 128 MiB or the workflow
+            # runner itself won't run reliably.
+            if job_res_reqs[0].get("ramMin", 1024) < 128:
+                job_res_reqs[0]["ramMin"] = 128
+
+        arguments = ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl", "cwl.input.yml"]
+        if runtimeContext.debug:
+            arguments.insert(0, '--debug')
+
         wf_runner = cmap({
             "class": "CommandLineTool",
             "baseCommand": "cwltool",
@@ -301,7 +325,7 @@ class ArvadosWorkflow(Workflow):
                     }]
             }],
             "hints": self.hints,
-            "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"],
+            "arguments": arguments,
             "id": "#"
         })
         return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext)