Updating cwl and yml for intervals and other changes
[arvados-tutorial.git] / WGS-processing / cwl / wgs-processing-wf.cwl
1 cwlVersion: v1.1
2 class: Workflow
3 label: WGS processing workflow scattered over samples
4
5 requirements:
6   - class: SubworkflowFeatureRequirement
7   - class: ScatterFeatureRequirement
8
9 inputs:
10   fastqdir:
11     type: Directory 
12     label: Directory of paired FASTQ files
13   reference:
14     type: File
15     format: edam:format_1929 # FASTA
16     label: Reference genome 
17     secondaryFiles:
18       - .amb
19       - .ann
20       - .bwt
21       - .pac
22       - .sa
23       - .fai
24       - ^.dict
25   fullintervallist:
26     type: File
27   knownsites:
28     type: File
29     format: edam:format_3016 # VCF
30     label: VCF of known polymorphic sites for BQSR
31     secondaryFiles:
32       - .tbi   
33   scattercount: 
34     type: string
35     label: Desired split for variant calling
36   clinvarvcf: 
37     type: File
38     format: edam:format_3016 # VCF
39     label: Reference VCF for ClinVar
40   reportfunc: 
41     type: File
42     label: Function used to create HTML report
43   headhtml: 
44     type: File
45     format: edam:format_2331 # HTML 
46     label: Header for HTML report
47   tailhtml: 
48     type: File
49     format: edam:format_2331 # HTML
50     label: Footer for HTML report
51
52 outputs:
53   gvcf:
54     type: File[]
55     outputSource: bwamem-gatk-report/gvcf
56     format: edam:format_3016 # GVCF
57     label: GVCFs generated from GATK 
58   report:
59     type: File[]  
60     outputSource: bwamem-gatk-report/report
61     format: edam:format_2331 # HTML
62     label: ClinVar variant reports 
63
64 steps:
65   getfastq:
66     run: ./helper/getfastq.cwl
67     in:
68       fastqdir: fastqdir
69     out: [fastq1, fastq2, sample]
70
71   bwamem-gatk-report:
72     run: ./helper/bwamem-gatk-report-wf.cwl
73     scatter: [fastq1, fastq2, sample]
74     scatterMethod: dotproduct
75     in:
76       fastq1: getfastq/fastq1
77       fastq2: getfastq/fastq2
78       reference: reference
79       fullintervallist: fullintervallist
80       sample: getfastq/sample
81       knownsites: knownsites
82       scattercount: scattercount
83       clinvarvcf: clinvarvcf
84       reportfunc: reportfunc
85       headhtml: headhtml
86       tailhtml: tailhtml
87     out: [qc-html,qc-zip,gvcf,report]
88
89 s:codeRepository: https://github.com/arvados/arvados-tutorial
90 s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
91
92 $namespaces:
93  s: https://schema.org/
94  edam: http://edamontology.org/
95
96 #$schemas:
97 # - https://schema.org/version/latest/schema.rdf
98 # - http://edamontology.org/EDAM_1.18.owl