Removing white spaces and commenting schemas
[arvados-tutorial.git] / WGS-processing / cwl / helper / bwamem-gatk-report-wf.cwl
1 cwlVersion: v1.1
2 class: Workflow
3 label: WGS processing workflow for single sample
4
5 requirements:
6   - class: SubworkflowFeatureRequirement
7
8 inputs:
9   fastq1:
10     type: File
11     format: edam:format_1930 # FASTQ
12     label: One of set of pair-end FASTQs (R1)
13   fastq2:
14     type: File
15     format: edam:format_1930 # FASTQ
16     label: One of set of pair-end FASTQs (R2)
17   reference:
18     type: File
19     format: edam:format_1929 # FASTA
20     label: Reference genome
21     secondaryFiles:
22       - .amb
23       - .ann
24       - .bwt
25       - .pac
26       - .sa
27       - .fai
28       - ^.dict
29   sample: 
30     type: string
31     label: Sample Name
32   knownsites:
33     type: File
34     format: edam:format_3016 # VCF
35     label: VCF of known polymorphic sites for BQSR
36     secondaryFiles:
37       - .tbi   
38   scattercount: 
39     type: string
40     label: Desired split for variant calling
41   clinvarvcf:
42     type: File
43     format: edam:format_3016 # VCF
44     label: Reference VCF for ClinVar
45   reportfunc:
46     type: File
47     label: Function used to create HTML report
48   headhtml:
49     type: File
50     format: edam:format_1964 # HTML
51     label: Header for HTML report
52   tailhtml:
53     type: File
54     format: edam:format_1964 # HTML
55     label: Footer for HTML report
56
57 outputs:
58   qc-html:
59     type: File[]
60     label: FASTQ QC reports
61     format: edam:format_1964 # HTML
62     outputSource: fastqc/out-html
63   qc-zip:
64     type: File[]
65     label: Zip files of FASTQ QC report and associated data
66     outputSource: fastqc/out-zip 
67   gvcf:
68     type: File
69     outputSource: haplotypecaller/gatheredgvcf
70     format: edam:format_3016 # GVCF
71     label: GVCF generated from GATK Haplotype Caller
72   report:
73     type: File  
74     outputSource: generate-report/report
75     format: edam:format_1964 # HTML
76     label: ClinVar variant report
77
78 steps:
79   fastqc:
80     run: fastqc.cwl
81     in:
82       fastq1: fastq1
83       fastq2: fastq2
84     out: [out-html, out-zip]
85   bwamem-samtools-view:
86     run: bwamem-samtools-view.cwl
87     in:
88       fastq1: fastq1
89       fastq2: fastq2
90       reference: reference
91       sample: sample
92     out: [bam]
93   samtools-sort:
94     run: samtools-sort.cwl
95     in:
96       bam: bwamem-samtools-view/bam
97       sample: sample
98     out: [sortedbam]
99   mark-duplicates:
100     run: mark-duplicates.cwl
101     in:
102       bam: samtools-sort/sortedbam
103     out: [dupbam,dupmetrics]
104   samtools-index:
105     run: samtools-index.cwl
106     in:
107       bam: mark-duplicates/dupbam
108     out: [indexedbam]
109   haplotypecaller:
110     run: scatter-gatk-wf-with-interval.cwl 
111     in:
112       reference: reference
113       bam: samtools-index/indexedbam
114       sample: sample
115       scattercount: scattercount
116       knownsites: knownsites
117     out: [gatheredgvcf]
118   generate-report:
119     run: report-wf.cwl
120     in:
121       gvcf: haplotypecaller/gatheredgvcf
122       samplename: sample
123       clinvarvcf: clinvarvcf
124       reportfunc: reportfunc
125       headhtml: headhtml
126       tailhtml: tailhtml
127     out: [report]
128
129 s:codeRepository: https://github.com/arvados/arvados-tutorial
130 s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
131
132 $namespaces:
133  s: https://schema.org/
134  edam: http://edamontology.org/
135
136 $schemas:
137  - https://schema.org/version/latest/schema.rdf
138  - http://edamontology.org/EDAM_1.18.owl