Updating to allow for two vcfs of known sites
[arvados-tutorial.git] / WGS-processing / cwl / wgs-processing-wf.cwl
1 cwlVersion: v1.1
2 class: Workflow
3 label: WGS processing workflow scattered over samples
4
5 requirements:
6   - class: SubworkflowFeatureRequirement
7   - class: ScatterFeatureRequirement
8
9 inputs:
10   fastqdir:
11     type: Directory 
12     label: Directory of paired FASTQ files
13   reference:
14     type: File
15     format: edam:format_1929 # FASTA
16     label: Reference genome 
17     secondaryFiles:
18       - .amb
19       - .ann
20       - .bwt
21       - .pac
22       - .sa
23       - .fai
24       - ^.dict
25   fullintervallist:
26     type: File
27   knownsites1:
28     type: File
29     format: edam:format_3016 # VCF
30     label: VCF of known SNPS sites for BQSR
31     secondaryFiles:
32       - .idx   
33   knownsites2:
34     type: File
35     format: edam:format_3016 # VCF
36     label: VCF of known indel sites for BQSR
37     secondaryFiles:
38       - .tbi
39   scattercount: 
40     type: string
41     label: Desired split for variant calling
42   clinvarvcf: 
43     type: File
44     format: edam:format_3016 # VCF
45     label: Reference VCF for ClinVar
46   reportfunc: 
47     type: File
48     label: Function used to create HTML report
49   headhtml: 
50     type: File
51     format: edam:format_2331 # HTML 
52     label: Header for HTML report
53   tailhtml: 
54     type: File
55     format: edam:format_2331 # HTML
56     label: Footer for HTML report
57
58 outputs:
59   gvcf:
60     type: File[]
61     outputSource: bwamem-gatk-report/gvcf
62     format: edam:format_3016 # GVCF
63     label: GVCFs generated from GATK 
64   report:
65     type: File[]  
66     outputSource: bwamem-gatk-report/report
67     format: edam:format_2331 # HTML
68     label: ClinVar variant reports 
69
70 steps:
71   getfastq:
72     run: ./helper/getfastq.cwl
73     in:
74       fastqdir: fastqdir
75     out: [fastq1, fastq2, sample]
76
77   bwamem-gatk-report:
78     run: ./helper/bwamem-gatk-report-wf.cwl
79     scatter: [fastq1, fastq2, sample]
80     scatterMethod: dotproduct
81     in:
82       fastq1: getfastq/fastq1
83       fastq2: getfastq/fastq2
84       reference: reference
85       fullintervallist: fullintervallist
86       sample: getfastq/sample
87       knownsites1: knownsites1
88       knownsites2: knownsites2
89       scattercount: scattercount
90       clinvarvcf: clinvarvcf
91       reportfunc: reportfunc
92       headhtml: headhtml
93       tailhtml: tailhtml
94     out: [qc-html,qc-zip,gvcf,report]
95
96 s:codeRepository: https://github.com/arvados/arvados-tutorial
97 s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
98
99 $namespaces:
100  s: https://schema.org/
101  edam: http://edamontology.org/
102
103 #$schemas:
104 # - https://schema.org/version/latest/schema.rdf
105 # - http://edamontology.org/EDAM_1.18.owl