Updating to allow for two vcfs of known sites
[arvados-tutorial.git] / WGS-processing / cwl / helper / scatter-gatk-wf-with-interval.cwl
index 6e9f415f691c54fc41b5d497f28d9abb30f74855..13bd4af04c3528e384078e073c4e722823e498ec 100644 (file)
@@ -1,20 +1,22 @@
-$namespaces:
-  arv: "http://arvados.org/cwl#"
-  cwltool: "http://commonwl.org/cwltool#"
 cwlVersion: v1.1
 class: Workflow
+label: Scattered variant calling workflow
 
 requirements:
-  - class: SubworkflowFeatureRequirement
-  - class: ScatterFeatureRequirement
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
 
 inputs:
   bam:
     type: File
+    format: edam:format_2572 # BAM
+    label: Indexed sorted BAM with labeled duplicates
     secondaryFiles:
       - .bai
   reference:
     type: File
+    format: edam:format_1929 # FASTA
+    label: Reference genome
     secondaryFiles:
       - .amb
       - .ann
@@ -23,30 +25,48 @@ inputs:
       - .sa
       - .fai
       - ^.dict
-  sample: string
+  fullintervallist:
+    type: File
+    label: Full list of intervals to operate over
+  sample:
+    type: string
+    label: Sample Name
   knownsites1:
     type: File
+    format: edam:format_3016 # VCF
+    label: VCF of known polymorphic sites for BQSR
+    secondaryFiles:
+      - .idx
+  knownsites2:
+    type: File
+    format: edam:format_3016 # VCF
+    label: VCF of known polymorphic sites for BQSR
     secondaryFiles:
       - .tbi
-  scattercount: string
+  scattercount:
+    type: string
+    label: Desired split for variant calling
 
 outputs:
   gatheredgvcf:
     type: File
-    secondaryFiles: 
+    format: edam:format_3016 # GVCF
+    label: Gathered GVCF
+    secondaryFiles:
       - .tbi
-    outputSource: merge-GVCFs/gatheredgvcf
+    outputSource: gather-GVCFs/gatheredgvcf
     
 steps:
   splitintervals:
     run: gatk-splitintervals.cwl
     in:
       reference: reference
+      fullintervallist: fullintervallist
       sample: sample
       scattercount: scattercount
     out: [intervalfiles]
       
-  recal-haplotypecaller: 
+  recal-haplotypecaller:
     run: gatk-wf-with-interval.cwl
     scatter: intervallist
     in:
@@ -54,13 +74,25 @@ steps:
       reference: reference
       sample: sample
       knownsites1: knownsites1
+      knownsites2: knownsites2
       intervallist: splitintervals/intervalfiles
     out: [gvcf]
 
-  merge-GVCFs:
+  gather-GVCFs:
     run: gather-array-vcf.cwl
     in:
       gvcfarray: recal-haplotypecaller/gvcf
       sample: sample
       reference: reference
-    out: [gatheredgvcf] 
+    out: [gatheredgvcf]
+
+s:codeRepository: https://github.com/arvados/arvados-tutorial
+s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
+
+$namespaces:
+ s: https://schema.org/
+ edam: http://edamontology.org/
+
+#$schemas:
+# - https://schema.org/version/latest/schema.rdf
+# - http://edamontology.org/EDAM_1.18.owl