cwlVersion: v1.1
class: Workflow
+label: WGS processing workflow scattered over samples
requirements:
- class: SubworkflowFeatureRequirement
+ - class: ScatterFeatureRequirement
inputs:
- fastq1: File
- fastq2: File
+ fastqdir:
+ type: Directory
+ label: Directory of paired FASTQ files
reference:
type: File
+ format: edam:format_1929 # FASTA
+ label: Reference genome
secondaryFiles:
- .amb
- .ann
- .sa
- .fai
- ^.dict
- sample: string
+ fullintervallist:
+ type: File
knownsites:
type: File
+ format: edam:format_3016 # VCF
+ label: VCF of known polymorphic sites for BQSR
secondaryFiles:
- .tbi
- scattercount: string
- clinvarvcf: File
- reportfunc: File
- headhtml: File
- tailhtml: File
+ scattercount:
+ type: string
+ label: Desired split for variant calling
+ clinvarvcf:
+ type: File
+ format: edam:format_3016 # VCF
+ label: Reference VCF for ClinVar
+ reportfunc:
+ type: File
+ label: Function used to create HTML report
+ headhtml:
+ type: File
+ format: edam:format_2331 # HTML
+ label: Header for HTML report
+ tailhtml:
+ type: File
+ format: edam:format_2331 # HTML
+ label: Footer for HTML report
outputs:
- qc-html:
- type: File[]
- outputSource: fastqc/out-html
- qc-zip:
- type: File[]
- outputSource: fastqc/out-zip
gvcf:
- type: File
- outputSource: haplotypecaller/gatheredgvcf
+ type: File[]
+ outputSource: bwamem-gatk-report/gvcf
+ format: edam:format_3016 # GVCF
+ label: GVCFs generated from GATK
report:
- type: File
- outputSource: generate-report/report
+ type: File[]
+ outputSource: bwamem-gatk-report/report
+ format: edam:format_2331 # HTML
+ label: ClinVar variant reports
+
steps:
- fastqc:
- run: fastqc.cwl
+ getfastq:
+ run: ./helper/getfastq.cwl
in:
- fastq1: fastq1
- fastq2: fastq2
- out: [out-html, out-zip]
- bwamem-samtools-view:
- run: bwamem-samtools-view.cwl
- in:
- fastq1: fastq1
- fastq2: fastq2
- reference: reference
- sample: sample
- out: [bam]
- samtools-sort:
- run: samtools-sort.cwl
- in:
- bam: bwamem-samtools-view/bam
- sample: sample
- out: [sortedbam]
- mark-duplicates:
- run: mark-duplicates.cwl
- in:
- bam: samtools-sort/sortedbam
- out: [dupbam,dupmetrics]
- samtools-index:
- run: samtools-index.cwl
- in:
- bam: mark-duplicates/dupbam
- out: [indexedbam]
- haplotypecaller:
- run: scatter-gatk-wf-with-interval.cwl
+ fastqdir: fastqdir
+ out: [fastq1, fastq2, sample]
+
+ bwamem-gatk-report:
+ run: ./helper/bwamem-gatk-report-wf.cwl
+ scatter: [fastq1, fastq2, sample]
+ scatterMethod: dotproduct
in:
+ fastq1: getfastq/fastq1
+ fastq2: getfastq/fastq2
reference: reference
- bam: samtools-index/indexedbam
- sample: sample
+ fullintervallist: fullintervallist
+ sample: getfastq/sample
+ knownsites: knownsites
scattercount: scattercount
- knownsites1: knownsites
- out: [gatheredgvcf]
- generate-report:
- run: report-wf.cwl
- in:
- gvcf: haplotypecaller/gatheredgvcf
- samplename: sample
clinvarvcf: clinvarvcf
reportfunc: reportfunc
headhtml: headhtml
tailhtml: tailhtml
- out: [report]
+ out: [qc-html,qc-zip,gvcf,report]
+
+s:codeRepository: https://github.com/arvados/arvados-tutorial
+s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
+
+$namespaces:
+ s: https://schema.org/
+ edam: http://edamontology.org/
+
+#$schemas:
+# - https://schema.org/version/latest/schema.rdf
+# - http://edamontology.org/EDAM_1.18.owl