Updating cwl and yml for intervals and other changes
[arvados-tutorial.git] / WGS-processing / cwl / helper / bwamem-gatk-report-wf.cwl
1 cwlVersion: v1.1
2 class: Workflow
3 label: WGS processing workflow for single sample
4
5 requirements:
6   SubworkflowFeatureRequirement: {}
7
8 inputs:
9   fastq1:
10     type: File
11     format: edam:format_1930 # FASTQ
12     label: One of set of pair-end FASTQs (R1)
13   fastq2:
14     type: File
15     format: edam:format_1930 # FASTQ
16     label: One of set of pair-end FASTQs (R2)
17   reference:
18     type: File
19     format: edam:format_1929 # FASTA
20     label: Reference genome
21     secondaryFiles:
22       - .amb
23       - .ann
24       - .bwt
25       - .pac
26       - .sa
27       - .fai
28       - ^.dict
29   fullintervallist:
30     type: File
31     label: Full list of intervals to operate over
32   sample: 
33     type: string
34     label: Sample Name
35   knownsites:
36     type: File
37     format: edam:format_3016 # VCF
38     label: VCF of known polymorphic sites for BQSR
39     secondaryFiles:
40       - .tbi   
41   scattercount: 
42     type: string
43     label: Desired split for variant calling
44   clinvarvcf:
45     type: File
46     format: edam:format_3016 # VCF
47     label: Reference VCF for ClinVar
48   reportfunc:
49     type: File
50     label: Function used to create HTML report
51   headhtml:
52     type: File
53     format: edam:format_2331 # HTML
54     label: Header for HTML report
55   tailhtml:
56     type: File
57     format: edam:format_2331 # HTML
58     label: Footer for HTML report
59
60 outputs:
61   qc-html:
62     type: File[]
63     label: FASTQ QC reports
64     format: edam:format_2331 # HTML
65     outputSource: fastqc/out-html
66   qc-zip:
67     type: File[]
68     label: Zip files of FASTQ QC report and associated data
69     outputSource: fastqc/out-zip 
70   gvcf:
71     type: File
72     outputSource: haplotypecaller/gatheredgvcf
73     format: edam:format_3016 # GVCF
74     label: GVCF generated from GATK Haplotype Caller
75   report:
76     type: File  
77     outputSource: generate-report/report
78     format: edam:format_2331 # HTML
79     label: ClinVar variant report
80
81 steps:
82   fastqc:
83     run: fastqc.cwl
84     in:
85       fastq1: fastq1
86       fastq2: fastq2
87     out: [out-html, out-zip]
88   bwamem-samtools-view:
89     run: bwamem-samtools-view.cwl
90     in:
91       fastq1: fastq1
92       fastq2: fastq2
93       reference: reference
94       sample: sample
95     out: [bam]
96   samtools-sort:
97     run: samtools-sort.cwl
98     in:
99       bam: bwamem-samtools-view/bam
100       sample: sample
101     out: [sortedbam]
102   mark-duplicates:
103     run: mark-duplicates.cwl
104     in:
105       bam: samtools-sort/sortedbam
106     out: [dupbam,dupmetrics]
107   samtools-index:
108     run: samtools-index.cwl
109     in:
110       bam: mark-duplicates/dupbam
111     out: [indexedbam]
112   haplotypecaller:
113     run: scatter-gatk-wf-with-interval.cwl 
114     in:
115       reference: reference
116       fullintervallist: fullintervallist
117       bam: samtools-index/indexedbam
118       sample: sample
119       scattercount: scattercount
120       knownsites: knownsites
121     out: [gatheredgvcf]
122   generate-report:
123     run: report-wf.cwl
124     in:
125       gvcf: haplotypecaller/gatheredgvcf
126       sample: sample
127       clinvarvcf: clinvarvcf
128       reportfunc: reportfunc
129       headhtml: headhtml
130       tailhtml: tailhtml
131     out: [report]
132
133 s:codeRepository: https://github.com/arvados/arvados-tutorial
134 s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
135
136 $namespaces:
137  s: https://schema.org/
138  edam: http://edamontology.org/
139
140 #$schemas:
141 # - https://schema.org/version/latest/schema.rdf
142 # - http://edamontology.org/EDAM_1.18.owl