updating for metadata and yml to check command line cwl
[arvados-tutorial.git] / WGS-processing / cwl / wgs-processing-wf.cwl
1 cwlVersion: v1.1
2 class: Workflow
3 label: WGS processing workflow scattered over samples
4
5 requirements:
6   - class: SubworkflowFeatureRequirement
7   - class: ScatterFeatureRequirement
8
9 inputs:
10   fastqdir:
11     type: Directory 
12     label: Directory of paired FASTQ files
13   reference:
14     type: File
15     format: edam:format_1929 # FASTA
16     label: Reference genome 
17     secondaryFiles:
18       - .amb
19       - .ann
20       - .bwt
21       - .pac
22       - .sa
23       - .fai
24       - ^.dict
25   knownsites:
26     type: File
27     format: edam:format_3016 # VCF
28     label: VCF of known polymorphic sites for BQSR
29     secondaryFiles:
30       - .tbi   
31   scattercount: 
32     type: string
33     label: Desired split for variant calling
34   clinvarvcf: 
35     type: File
36     format: edam:format_3016 # VCF
37     label: Reference VCF for ClinVar
38   reportfunc: 
39     type: File
40     label: Function used to create HTML report
41   headhtml: 
42     type: File
43     format: edam:format_2331# HTML
44     label: Header for HTML report
45   tailhtml: 
46     type: File
47     format: edam:format_2331 # HTML
48     label: Footer for HTML report
49
50 outputs:
51   gvcf:
52     type: File[]
53     outputSource: bwamem-gatk-report/gvcf
54     format: edam:format_3016 # GVCF
55     label: GVCFs generated from GATK 
56   report:
57     type: File[]  
58     outputSource: bwamem-gatk-report/report
59     format: edam:format_1964 # HTML
60     label: ClinVar variant reports 
61
62 steps:
63   getfastq:
64     run: ./helper/getfastq.cwl
65     in:
66       fastqdir: fastqdir
67     out: [fastq1, fastq2, sample]
68
69   bwamem-gatk-report:
70     run: ./helper/bwamem-gatk-report-wf.cwl
71     scatter: [fastq1, fastq2, sample]
72     scatterMethod: dotproduct
73     in:
74       fastq1: getfastq/fastq1
75       fastq2: getfastq/fastq2
76       reference: reference
77       sample: getfastq/sample
78       knownsites: knownsites
79       scattercount: scattercount
80       clinvarvcf: clinvarvcf
81       reportfunc: reportfunc
82       headhtml: headhtml
83       tailhtml: tailhtml
84     out: [qc-html,qc-zip,gvcf,report]
85
86 s:codeRepository: https://github.com/arvados/arvados-tutorial
87 s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
88
89 $namespaces:
90  s: https://schema.org/
91  edam: http://edamontology.org/
92
93 $schemas:
94  - https://schema.org/version/latest/schema.rdf
95  - http://edamontology.org/EDAM_1.18.owl