Updating to allow for two vcfs of known sites
[arvados-tutorial.git] / WGS-processing / cwl / helper / scatter-gatk-wf-with-interval.cwl
index d740222db6e399a993ffb81d1767d80ba001426f..13bd4af04c3528e384078e073c4e722823e498ec 100644 (file)
@@ -1,10 +1,10 @@
 cwlVersion: v1.1
 class: Workflow
-label: Variant calling workflow scattered over interval splits 
+label: Scattered variant calling workflow
 
 requirements:
-  - class: SubworkflowFeatureRequirement
-  - class: ScatterFeatureRequirement
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
 
 inputs:
   bam:
@@ -25,16 +25,25 @@ inputs:
       - .sa
       - .fai
       - ^.dict
-  sample: 
+  fullintervallist:
+    type: File
+    label: Full list of intervals to operate over
+  sample:
     type: string
     label: Sample Name
   knownsites1:
+    type: File
+    format: edam:format_3016 # VCF
+    label: VCF of known polymorphic sites for BQSR
+    secondaryFiles:
+      - .idx
+  knownsites2:
     type: File
     format: edam:format_3016 # VCF
     label: VCF of known polymorphic sites for BQSR
     secondaryFiles:
       - .tbi
-  scattercount: 
+  scattercount:
     type: string
     label: Desired split for variant calling
 
@@ -42,21 +51,22 @@ outputs:
   gatheredgvcf:
     type: File
     format: edam:format_3016 # GVCF
-    label: GVCF generated from GATK Haplotype Caller
-    secondaryFiles: 
+    label: Gathered GVCF
+    secondaryFiles:
       - .tbi
-    outputSource: merge-GVCFs/gatheredgvcf
+    outputSource: gather-GVCFs/gatheredgvcf
     
 steps:
   splitintervals:
     run: gatk-splitintervals.cwl
     in:
       reference: reference
+      fullintervallist: fullintervallist
       sample: sample
       scattercount: scattercount
     out: [intervalfiles]
       
-  recal-haplotypecaller: 
+  recal-haplotypecaller:
     run: gatk-wf-with-interval.cwl
     scatter: intervallist
     in:
@@ -64,16 +74,17 @@ steps:
       reference: reference
       sample: sample
       knownsites1: knownsites1
+      knownsites2: knownsites2
       intervallist: splitintervals/intervalfiles
     out: [gvcf]
 
-  merge-GVCFs:
+  gather-GVCFs:
     run: gather-array-vcf.cwl
     in:
       gvcfarray: recal-haplotypecaller/gvcf
       sample: sample
       reference: reference
-    out: [gatheredgvcf] 
+    out: [gatheredgvcf]
 
 s:codeRepository: https://github.com/arvados/arvados-tutorial
 s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
@@ -82,6 +93,6 @@ $namespaces:
  s: https://schema.org/
  edam: http://edamontology.org/
 
-$schemas:
- - https://schema.org/version/latest/schema.rdf
- - http://edamontology.org/EDAM_1.18.owl
+#$schemas:
+# - https://schema.org/version/latest/schema.rdf
+# - http://edamontology.org/EDAM_1.18.owl