Updating to allow for two vcfs of known sites
[arvados-tutorial.git] / WGS-processing / cwl / helper / bwamem-gatk-report-wf.cwl
index 0a4b9d274d40a8b9ffe854b8d79f924671ac6953..76e4798bf270315a57d10a2363aa203487feedc1 100644 (file)
@@ -1,14 +1,23 @@
 cwlVersion: v1.1
 class: Workflow
+label: WGS processing workflow for single sample
 
 requirements:
-  - class: SubworkflowFeatureRequirement
+  SubworkflowFeatureRequirement: {}
 
 inputs:
-  fastq1: File
-  fastq2: File
+  fastq1:
+    type: File
+    format: edam:format_1930 # FASTQ
+    label: One of set of pair-end FASTQs (R1)
+  fastq2:
+    type: File
+    format: edam:format_1930 # FASTQ
+    label: One of set of pair-end FASTQs (R2)
   reference:
     type: File
+    format: edam:format_1929 # FASTA
+    label: Reference genome
     secondaryFiles:
       - .amb
       - .ann
@@ -17,30 +26,64 @@ inputs:
       - .sa
       - .fai
       - ^.dict
-  sample: string
-  knownsites:
+  fullintervallist:
     type: File
+    label: Full list of intervals to operate over
+  sample: 
+    type: string
+    label: Sample Name
+  knownsites1:
+    type: File
+    format: edam:format_3016 # VCF
+    label: VCF of known polymorphic sites for BQSR
     secondaryFiles:
-      - .tbi   
-  scattercount: string
-  clinvarvcf: File
-  reportfunc: File
-  headhtml: File
-  tailhtml: File
+      - .idx 
+  knownsites2:
+    type: File
+    format: edam:format_3016 # VCF
+    label: VCF of known polymorphic sites for BQSR
+    secondaryFiles:
+      - .tbi  
+  scattercount: 
+    type: string
+    label: Desired split for variant calling
+  clinvarvcf:
+    type: File
+    format: edam:format_3016 # VCF
+    label: Reference VCF for ClinVar
+  reportfunc:
+    type: File
+    label: Function used to create HTML report
+  headhtml:
+    type: File
+    format: edam:format_2331 # HTML
+    label: Header for HTML report
+  tailhtml:
+    type: File
+    format: edam:format_2331 # HTML
+    label: Footer for HTML report
 
 outputs:
   qc-html:
     type: File[]
+    label: FASTQ QC reports
+    format: edam:format_2331 # HTML
     outputSource: fastqc/out-html
   qc-zip:
     type: File[]
+    label: Zip files of FASTQ QC report and associated data
     outputSource: fastqc/out-zip 
   gvcf:
     type: File
     outputSource: haplotypecaller/gatheredgvcf
+    format: edam:format_3016 # GVCF
+    label: GVCF generated from GATK Haplotype Caller
   report:
     type: File  
     outputSource: generate-report/report
+    format: edam:format_2331 # HTML
+    label: ClinVar variant report
+
 steps:
   fastqc:
     run: fastqc.cwl
@@ -57,7 +100,7 @@ steps:
       sample: sample
     out: [bam]
   samtools-sort:
-    run: samtools-sort.cwl 
+    run: samtools-sort.cwl
     in:
       bam: bwamem-samtools-view/bam
       sample: sample
@@ -76,18 +119,31 @@ steps:
     run: scatter-gatk-wf-with-interval.cwl 
     in:
       reference: reference
+      fullintervallist: fullintervallist
       bam: samtools-index/indexedbam
       sample: sample
       scattercount: scattercount
-      knownsites1: knownsites
+      knownsites1: knownsites1
+      knownsites2: knownsites2
     out: [gatheredgvcf]
   generate-report:
     run: report-wf.cwl
     in:
       gvcf: haplotypecaller/gatheredgvcf
-      samplename: sample
+      sample: sample
       clinvarvcf: clinvarvcf
       reportfunc: reportfunc
       headhtml: headhtml
       tailhtml: tailhtml
-    out: [report]  
+    out: [report]
+
+s:codeRepository: https://github.com/arvados/arvados-tutorial
+s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
+
+$namespaces:
+ s: https://schema.org/
+ edam: http://edamontology.org/
+
+#$schemas:
+# - https://schema.org/version/latest/schema.rdf
+# - http://edamontology.org/EDAM_1.18.owl