Updating to allow for two vcfs of known sites
[arvados-tutorial.git] / WGS-processing / cwl / helper / scatter-gatk-wf-with-interval.cwl
1 cwlVersion: v1.1
2 class: Workflow
3 label: Scattered variant calling workflow
4
5 requirements:
6   SubworkflowFeatureRequirement: {}
7   ScatterFeatureRequirement: {}
8
9 inputs:
10   bam:
11     type: File
12     format: edam:format_2572 # BAM
13     label: Indexed sorted BAM with labeled duplicates
14     secondaryFiles:
15       - .bai
16   reference:
17     type: File
18     format: edam:format_1929 # FASTA
19     label: Reference genome
20     secondaryFiles:
21       - .amb
22       - .ann
23       - .bwt
24       - .pac
25       - .sa
26       - .fai
27       - ^.dict
28   fullintervallist:
29     type: File
30     label: Full list of intervals to operate over
31   sample:
32     type: string
33     label: Sample Name
34   knownsites1:
35     type: File
36     format: edam:format_3016 # VCF
37     label: VCF of known polymorphic sites for BQSR
38     secondaryFiles:
39       - .idx
40   knownsites2:
41     type: File
42     format: edam:format_3016 # VCF
43     label: VCF of known polymorphic sites for BQSR
44     secondaryFiles:
45       - .tbi
46   scattercount:
47     type: string
48     label: Desired split for variant calling
49
50 outputs:
51   gatheredgvcf:
52     type: File
53     format: edam:format_3016 # GVCF
54     label: Gathered GVCF
55     secondaryFiles:
56       - .tbi
57     outputSource: gather-GVCFs/gatheredgvcf
58     
59 steps:
60   splitintervals:
61     run: gatk-splitintervals.cwl
62     in:
63       reference: reference
64       fullintervallist: fullintervallist
65       sample: sample
66       scattercount: scattercount
67     out: [intervalfiles]
68       
69   recal-haplotypecaller:
70     run: gatk-wf-with-interval.cwl
71     scatter: intervallist
72     in:
73       bam: bam
74       reference: reference
75       sample: sample
76       knownsites1: knownsites1
77       knownsites2: knownsites2
78       intervallist: splitintervals/intervalfiles
79     out: [gvcf]
80
81   gather-GVCFs:
82     run: gather-array-vcf.cwl
83     in:
84       gvcfarray: recal-haplotypecaller/gvcf
85       sample: sample
86       reference: reference
87     out: [gatheredgvcf]
88
89 s:codeRepository: https://github.com/arvados/arvados-tutorial
90 s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
91
92 $namespaces:
93  s: https://schema.org/
94  edam: http://edamontology.org/
95
96 #$schemas:
97 # - https://schema.org/version/latest/schema.rdf
98 # - http://edamontology.org/EDAM_1.18.owl