10812: Use packed workflows for all run modes.
[arvados.git] / sdk / cwl / arvados_cwl / __init__.py
index 3ffc4c7254a0b8a850716e82aebe6bbd1afb3c26..02eca644d30eee769d38cfe083ae513f4b5f49e8 100644 (file)
@@ -28,7 +28,7 @@ from arvados.errors import ApiError
 
 from .arvcontainer import ArvadosContainer, RunnerContainer
 from .arvjob import ArvadosJob, RunnerJob, RunnerTemplate
-from. runner import Runner, upload_instance
+from. runner import Runner, upload_docker, upload_job_order, upload_workflow_deps
 from .arvtool import ArvadosCommandTool
 from .arvworkflow import ArvadosWorkflow, upload_workflow
 from .fsaccess import CollectionFsAccess, CollectionFetcher, collectionResolver
@@ -177,7 +177,7 @@ class ArvCwlRunner(object):
 
                 for p in proc_states["items"]:
                     self.on_message({
-                        "object_uuid": p["uuid"],
+                       "object_uuid": p["uuid"],
                         "event_type": "update",
                         "properties": {
                             "new_attributes": p
@@ -334,23 +334,44 @@ class ArvCwlRunner(object):
                                                                  keep_client=self.keep_client)
         self.fs_access = make_fs_access(kwargs["basedir"])
 
+        if not kwargs.get("name"):
+            kwargs["name"] = self.name = tool.tool.get("label") or tool.metadata.get("label") or os.path.basename(tool.tool["id"])
+
+        # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
+        # Also uploads docker images.
+        upload_workflow_deps(self, tool)
+
+        # Reload tool object which may have been updated by
+        # upload_workflow_deps
+        tool = self.arv_make_tool(tool.doc_loader.idx[tool.tool["id"]],
+                                  makeTool=self.arv_make_tool,
+                                  loader=tool.doc_loader,
+                                  avsc_names=tool.doc_schema,
+                                  metadata=tool.metadata)
+
+        # Upload local file references in the job order.
+        job_order = upload_job_order(self, "%s input" % kwargs["name"],
+                                     tool, job_order)
+
         existing_uuid = kwargs.get("update_workflow")
         if existing_uuid or kwargs.get("create_workflow"):
+            # Create a pipeline template or workflow record and exit.
             if self.work_api == "jobs":
                 tmpl = RunnerTemplate(self, tool, job_order,
                                       kwargs.get("enable_reuse"),
                                       uuid=existing_uuid,
                                       submit_runner_ram=kwargs.get("submit_runner_ram"),
-                                      name=kwargs.get("name"))
+                                      name=kwargs["name"])
                 tmpl.save()
                 # cwltool.main will write our return value to stdout.
                 return (tmpl.uuid, "success")
-            else:
+            elif self.work_api == "containers":
                 return (upload_workflow(self, tool, job_order,
-                                       self.project_uuid,
-                                       uuid=existing_uuid,
-                                       submit_runner_ram=kwargs.get("submit_runner_ram"),
-                                        name=kwargs.get("name")), "success")
+                                        self.project_uuid,
+                                        uuid=existing_uuid,
+                                        submit_runner_ram=kwargs.get("submit_runner_ram"),
+                                        name=kwargs["name"]),
+                        "success")
 
         self.ignore_docker_for_reuse = kwargs.get("ignore_docker_for_reuse")
 
@@ -360,9 +381,6 @@ class ArvCwlRunner(object):
         kwargs["tmpdir_prefix"] = "tmp"
         kwargs["compute_checksum"] = kwargs.get("compute_checksum")
 
-        if not kwargs["name"]:
-            del kwargs["name"]
-
         if self.work_api == "containers":
             kwargs["outdir"] = "/var/spool/cwl"
             kwargs["docker_outdir"] = "/var/spool/cwl"
@@ -373,26 +391,39 @@ class ArvCwlRunner(object):
             kwargs["docker_outdir"] = "$(task.outdir)"
             kwargs["tmpdir"] = "$(task.tmpdir)"
 
-        upload_instance(self, shortname(tool.tool["id"]), tool, job_order)
-
         runnerjob = None
         if kwargs.get("submit"):
+            # Submit a runner job to run the workflow for us.
             if self.work_api == "containers":
                 if tool.tool["class"] == "CommandLineTool":
                     kwargs["runnerjob"] = tool.tool["id"]
+                    upload_dependencies(self,
+                                        kwargs["name"],
+                                        tool.doc_loader,
+                                        tool.tool,
+                                        tool.tool["id"],
+                                        False)
                     runnerjob = tool.job(job_order,
                                          self.output_callback,
                                          **kwargs).next()
                 else:
-                    runnerjob = RunnerContainer(self, tool, job_order, kwargs.get("enable_reuse"), self.output_name,
-                                                self.output_tags, submit_runner_ram=kwargs.get("submit_runner_ram"),
-                                                name=kwargs.get("name"), on_error=kwargs.get("on_error"))
-            else:
-                runnerjob = RunnerJob(self, tool, job_order, kwargs.get("enable_reuse"), self.output_name,
-                                      self.output_tags, submit_runner_ram=kwargs.get("submit_runner_ram"),
-                                      name=kwargs.get("name"), on_error=kwargs.get("on_error"))
+                    runnerjob = RunnerContainer(self, tool, job_order, kwargs.get("enable_reuse"),
+                                                self.output_name,
+                                                self.output_tags,
+                                                submit_runner_ram=kwargs.get("submit_runner_ram"),
+                                                name=kwargs.get("name"),
+                                                on_error=kwargs.get("on_error"),
+                                                submit_runner_image=kwargs.get("submit_runner_image"))
+            elif self.work_api == "jobs":
+                runnerjob = RunnerJob(self, tool, job_order, kwargs.get("enable_reuse"),
+                                      self.output_name,
+                                      self.output_tags,
+                                      submit_runner_ram=kwargs.get("submit_runner_ram"),
+                                      name=kwargs.get("name"),
+                                      on_error=kwargs.get("on_error"),
+                                      submit_runner_image=kwargs.get("submit_runner_image"))
 
-        if not kwargs.get("submit") and "cwl_runner_job" not in kwargs and not self.work_api == "containers":
+        if not kwargs.get("submit") and "cwl_runner_job" not in kwargs and self.work_api == "jobs":
             # Create pipeline for local run
             self.pipeline = self.api.pipeline_instances().create(
                 body={
@@ -573,6 +604,10 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         help="RAM (in MiB) required for the workflow runner job (default 1024)",
                         default=1024)
 
+    parser.add_argument("--submit-runner-image", type=str,
+                        help="Docker image for workflow runner job, default arvados/jobs:%s" % __version__,
+                        default=None)
+
     parser.add_argument("--name", type=str,
                         help="Name to use for workflow execution instance.",
                         default=None)