Merge branch '12913-secondary-mounts' refs #12913
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 9 Jan 2018 14:50:17 +0000 (09:50 -0500)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 9 Jan 2018 14:50:21 +0000 (09:50 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/tests/arvados-tests.yml
sdk/cwl/tests/secondary/dir/hg19.fa [new file with mode: 0644]
sdk/cwl/tests/secondary/dir/hg19.fa.amb [new file with mode: 0644]
sdk/cwl/tests/secondary/dir/hg19.fa.ann [new file with mode: 0644]
sdk/cwl/tests/secondary/dir/hg19.fa.fai [new file with mode: 0644]
sdk/cwl/tests/secondary/ls.cwl [new file with mode: 0644]
sdk/cwl/tests/secondary/sub.cwl [new file with mode: 0644]
sdk/cwl/tests/secondary/wf-job.yml [new file with mode: 0644]
sdk/cwl/tests/secondary/wf.cwl [new file with mode: 0644]

index 316af0e52847d2caad516ef1b6d126577b948e92..d426c1ad6f32fef90e0a5b15a3691fe811c78760 100644 (file)
@@ -17,7 +17,8 @@ from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, visit_class
 
 import ruamel.yaml as yaml
 
-from .runner import upload_dependencies, packed_workflow, upload_workflow_collection, trim_anonymous_location, remove_redundant_fields
+from .runner import (upload_dependencies, packed_workflow, upload_workflow_collection,
+                     trim_anonymous_location, remove_redundant_fields, discover_secondary_files)
 from .pathmapper import ArvPathMapper, trim_listing
 from .arvtool import ArvadosCommandTool
 from .perf import Perf
@@ -88,6 +89,8 @@ class ArvadosWorkflow(Workflow):
                     raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
             document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
 
+            discover_secondary_files(self.tool["inputs"], joborder)
+
             with Perf(metrics, "subworkflow upload_deps"):
                 upload_dependencies(self.arvrunner,
                                     os.path.basename(joborder.get("id", "#")),
index c55e976924f65bd362153a0921152e5a170ca47b..28de7f368a23ccbc52a0cae37fa55d358744d0e1 100644 (file)
@@ -191,12 +191,8 @@ def tag_git_version(packed):
             packed["http://schema.org/version"] = githash
 
 
-def upload_job_order(arvrunner, name, tool, job_order):
-    """Upload local files referenced in the input object and return updated input
-    object with 'location' updated to the proper keep references.
-    """
-
-    for t in tool.tool["inputs"]:
+def discover_secondary_files(inputs, job_order):
+    for t in inputs:
         def setSecondary(fileobj):
             if isinstance(fileobj, dict) and fileobj.get("class") == "File":
                 if "secondaryFiles" not in fileobj:
@@ -209,6 +205,13 @@ def upload_job_order(arvrunner, name, tool, job_order):
         if shortname(t["id"]) in job_order and t.get("secondaryFiles"):
             setSecondary(job_order[shortname(t["id"])])
 
+def upload_job_order(arvrunner, name, tool, job_order):
+    """Upload local files referenced in the input object and return updated input
+    object with 'location' updated to the proper keep references.
+    """
+
+    discover_secondary_files(tool.tool["inputs"], job_order)
+
     jobmapper = upload_dependencies(arvrunner,
                                     name,
                                     tool.doc_loader,
index f6271b85d2f9acab6f6371e3def545b1473cd8d7..cfb2fa4a9a8b88c0bee1e5e62ca5cdd9275daa69 100644 (file)
     out: out
   tool: wf/runin-wf.cwl
   doc: "RunInSingleContainer cwl.input.json needs to be consistent with pathmapper manipulations"
+
+- job: secondary/wf-job.yml
+  output: {}
+  tool: secondary/wf.cwl
+  doc: "RunInSingleContainer applies secondaryFile discovery & manipulation before generating cwl.input.yml"
diff --git a/sdk/cwl/tests/secondary/dir/hg19.fa b/sdk/cwl/tests/secondary/dir/hg19.fa
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/sdk/cwl/tests/secondary/dir/hg19.fa.amb b/sdk/cwl/tests/secondary/dir/hg19.fa.amb
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/sdk/cwl/tests/secondary/dir/hg19.fa.ann b/sdk/cwl/tests/secondary/dir/hg19.fa.ann
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/sdk/cwl/tests/secondary/dir/hg19.fa.fai b/sdk/cwl/tests/secondary/dir/hg19.fa.fai
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/sdk/cwl/tests/secondary/ls.cwl b/sdk/cwl/tests/secondary/ls.cwl
new file mode 100644 (file)
index 0000000..b37990a
--- /dev/null
@@ -0,0 +1,11 @@
+cwlVersion: v1.0
+class: CommandLineTool
+inputs:
+  i:
+    type: File
+    inputBinding:
+      position: 1
+    secondaryFiles:
+      - .fai
+outputs: []
+arguments: [ls, $(inputs.i), $(inputs.i.path).fai]
diff --git a/sdk/cwl/tests/secondary/sub.cwl b/sdk/cwl/tests/secondary/sub.cwl
new file mode 100644 (file)
index 0000000..5d2c699
--- /dev/null
@@ -0,0 +1,17 @@
+cwlVersion: v1.0
+class: Workflow
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+inputs:
+  i:
+    type: File
+    secondaryFiles:
+      - .fai
+outputs: []
+steps:
+  step1:
+    in:
+      i: i
+    out: []
+    run: ls.cwl
diff --git a/sdk/cwl/tests/secondary/wf-job.yml b/sdk/cwl/tests/secondary/wf-job.yml
new file mode 100644 (file)
index 0000000..8b9dd83
--- /dev/null
@@ -0,0 +1,3 @@
+i:
+  class: File
+  location: keep:f225e6259bdd63bc7240599648dde9f1+97/hg19.fa
diff --git a/sdk/cwl/tests/secondary/wf.cwl b/sdk/cwl/tests/secondary/wf.cwl
new file mode 100644 (file)
index 0000000..248aefd
--- /dev/null
@@ -0,0 +1,23 @@
+cwlVersion: v1.0
+class: Workflow
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+requirements:
+  SubworkflowFeatureRequirement: {}
+inputs:
+  i:
+    type: File
+    # secondaryFiles:
+    #   - .fai
+    #   - .ann
+    #   - .amb
+outputs: []
+steps:
+  step1:
+    in:
+      i: i
+    out: []
+    run: sub.cwl
+    requirements:
+      arv:RunInSingleContainer: {}