Bugfix .bai secondary file for ApplyBSQR behavior
[arvados-tutorial.git] / WGS-processing / cwl / helper / bwamem-gatk-report-wf.cwl
1 cwlVersion: v1.1
2 class: Workflow
3 label: WGS processing workflow for single sample
4
5 requirements:
6   SubworkflowFeatureRequirement: {}
7
8 inputs:
9   fastq1:
10     type: File
11     format: edam:format_1930 # FASTQ
12     label: One of set of pair-end FASTQs (R1)
13   fastq2:
14     type: File
15     format: edam:format_1930 # FASTQ
16     label: One of set of pair-end FASTQs (R2)
17   reference:
18     type: File
19     format: edam:format_1929 # FASTA
20     label: Reference genome
21     secondaryFiles:
22       - .amb
23       - .ann
24       - .bwt
25       - .pac
26       - .sa
27       - .fai
28       - ^.dict
29   fullintervallist:
30     type: File
31     label: Full list of intervals to operate over
32   sample: 
33     type: string
34     label: Sample Name
35   knownsites1:
36     type: File
37     format: edam:format_3016 # VCF
38     label: VCF of known SNPS sites for BQSR
39     secondaryFiles:
40       - .idx
41   knownsites2:
42     type: File
43     format: edam:format_3016 # VCF
44     label: VCF of known indel sites for BQSR
45     secondaryFiles:
46       - .tbi
47   scattercount: 
48     type: string
49     label: Desired split for variant calling
50   clinvarvcf:
51     type: File
52     format: edam:format_3016 # VCF
53     label: Reference VCF for ClinVar
54   reportfunc:
55     type: File
56     label: Function used to create HTML report
57   headhtml:
58     type: File
59     format: edam:format_2331 # HTML
60     label: Header for HTML report
61   tailhtml:
62     type: File
63     format: edam:format_2331 # HTML
64     label: Footer for HTML report
65
66 outputs:
67   qc-html:
68     type: File[]
69     label: FASTQ QC reports
70     format: edam:format_2331 # HTML
71     outputSource: fastqc/out-html
72   qc-zip:
73     type: File[]
74     label: Zip files of FASTQ QC report and associated data
75     outputSource: fastqc/out-zip 
76   gvcf:
77     type: File
78     outputSource: haplotypecaller/gatheredgvcf
79     format: edam:format_3016 # GVCF
80     label: GVCF generated from GATK Haplotype Caller
81   report:
82     type: File  
83     outputSource: generate-report/report
84     format: edam:format_2331 # HTML
85     label: ClinVar variant report
86
87 steps:
88   fastqc:
89     run: fastqc.cwl
90     in:
91       fastq1: fastq1
92       fastq2: fastq2
93     out: [out-html, out-zip]
94   bwamem-samtools-view:
95     run: bwamem-samtools-view.cwl
96     in:
97       fastq1: fastq1
98       fastq2: fastq2
99       reference: reference
100       sample: sample
101     out: [bam]
102   samtools-sort:
103     run: samtools-sort.cwl
104     in:
105       bam: bwamem-samtools-view/bam
106       sample: sample
107     out: [sortedbam]
108   mark-duplicates:
109     run: mark-duplicates.cwl
110     in:
111       bam: samtools-sort/sortedbam
112     out: [dupbam,dupmetrics]
113   samtools-index:
114     run: samtools-index.cwl
115     in:
116       bam: mark-duplicates/dupbam
117     out: [indexedbam]
118   haplotypecaller:
119     run: scatter-gatk-wf-with-interval.cwl 
120     in:
121       reference: reference
122       fullintervallist: fullintervallist
123       bam: samtools-index/indexedbam
124       sample: sample
125       scattercount: scattercount
126       knownsites1: knownsites1
127       knownsites2: knownsites2
128     out: [gatheredgvcf]
129   generate-report:
130     run: report-wf.cwl
131     in:
132       gvcf: haplotypecaller/gatheredgvcf
133       sample: sample
134       clinvarvcf: clinvarvcf
135       reportfunc: reportfunc
136       headhtml: headhtml
137       tailhtml: tailhtml
138     out: [report]
139
140 s:codeRepository: https://github.com/arvados/arvados-tutorial
141 s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
142
143 $namespaces:
144  s: https://schema.org/
145  edam: http://edamontology.org/
146
147 #$schemas:
148 # - https://schema.org/version/latest/schema.rdf
149 # - http://edamontology.org/EDAM_1.18.owl