Updating cwl and yml for intervals and other changes
[arvados-tutorial.git] / WGS-processing / cwl / wgs-processing-wf.cwl
index 0a4b9d274d40a8b9ffe854b8d79f924671ac6953..8cbcf5ee03b17f7d5800b611fd29a4d69cfcc105 100644 (file)
@@ -1,14 +1,19 @@
 cwlVersion: v1.1
 class: Workflow
+label: WGS processing workflow scattered over samples
 
 requirements:
   - class: SubworkflowFeatureRequirement
+  - class: ScatterFeatureRequirement
 
 inputs:
-  fastq1: File
-  fastq2: File
+  fastqdir:
+    type: Directory 
+    label: Directory of paired FASTQ files
   reference:
     type: File
+    format: edam:format_1929 # FASTA
+    label: Reference genome 
     secondaryFiles:
       - .amb
       - .ann
@@ -17,77 +22,77 @@ inputs:
       - .sa
       - .fai
       - ^.dict
-  sample: string
+  fullintervallist:
+    type: File
   knownsites:
     type: File
+    format: edam:format_3016 # VCF
+    label: VCF of known polymorphic sites for BQSR
     secondaryFiles:
       - .tbi   
-  scattercount: string
-  clinvarvcf: File
-  reportfunc: File
-  headhtml: File
-  tailhtml: File
+  scattercount: 
+    type: string
+    label: Desired split for variant calling
+  clinvarvcf: 
+    type: File
+    format: edam:format_3016 # VCF
+    label: Reference VCF for ClinVar
+  reportfunc: 
+    type: File
+    label: Function used to create HTML report
+  headhtml: 
+    type: File
+    format: edam:format_2331 # HTML 
+    label: Header for HTML report
+  tailhtml: 
+    type: File
+    format: edam:format_2331 # HTML
+    label: Footer for HTML report
 
 outputs:
-  qc-html:
-    type: File[]
-    outputSource: fastqc/out-html
-  qc-zip:
-    type: File[]
-    outputSource: fastqc/out-zip 
   gvcf:
-    type: File
-    outputSource: haplotypecaller/gatheredgvcf
+    type: File[]
+    outputSource: bwamem-gatk-report/gvcf
+    format: edam:format_3016 # GVCF
+    label: GVCFs generated from GATK 
   report:
-    type: File  
-    outputSource: generate-report/report
+    type: File[]  
+    outputSource: bwamem-gatk-report/report
+    format: edam:format_2331 # HTML
+    label: ClinVar variant reports 
+
 steps:
-  fastqc:
-    run: fastqc.cwl
+  getfastq:
+    run: ./helper/getfastq.cwl
     in:
-      fastq1: fastq1
-      fastq2: fastq2
-    out: [out-html, out-zip]
-  bwamem-samtools-view:
-    run: bwamem-samtools-view.cwl
-    in:
-      fastq1: fastq1
-      fastq2: fastq2
-      reference: reference
-      sample: sample
-    out: [bam]
-  samtools-sort:
-    run: samtools-sort.cwl 
-    in:
-      bam: bwamem-samtools-view/bam
-      sample: sample
-    out: [sortedbam]
-  mark-duplicates:
-    run: mark-duplicates.cwl
-    in:
-      bam: samtools-sort/sortedbam
-    out: [dupbam,dupmetrics]
-  samtools-index:
-    run: samtools-index.cwl
-    in:
-      bam: mark-duplicates/dupbam
-    out: [indexedbam]
-  haplotypecaller:
-    run: scatter-gatk-wf-with-interval.cwl 
+      fastqdir: fastqdir
+    out: [fastq1, fastq2, sample]
+
+  bwamem-gatk-report:
+    run: ./helper/bwamem-gatk-report-wf.cwl
+    scatter: [fastq1, fastq2, sample]
+    scatterMethod: dotproduct
     in:
+      fastq1: getfastq/fastq1
+      fastq2: getfastq/fastq2
       reference: reference
-      bam: samtools-index/indexedbam
-      sample: sample
+      fullintervallist: fullintervallist
+      sample: getfastq/sample
+      knownsites: knownsites
       scattercount: scattercount
-      knownsites1: knownsites
-    out: [gatheredgvcf]
-  generate-report:
-    run: report-wf.cwl
-    in:
-      gvcf: haplotypecaller/gatheredgvcf
-      samplename: sample
       clinvarvcf: clinvarvcf
       reportfunc: reportfunc
       headhtml: headhtml
       tailhtml: tailhtml
-    out: [report]  
+    out: [qc-html,qc-zip,gvcf,report]
+
+s:codeRepository: https://github.com/arvados/arvados-tutorial
+s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
+
+$namespaces:
+ s: https://schema.org/
+ edam: http://edamontology.org/
+
+#$schemas:
+# - https://schema.org/version/latest/schema.rdf
+# - http://edamontology.org/EDAM_1.18.owl