+Arvados demo showing processing of whole genome sequencing (WGS) data. The workflow includes:
+* Local alignment using BWA-MEM
+* Variant calling in parallel using GATK
+* Generation of HTML Report showing Comparision of Variants with ClinVar Public Archive
+Workflows are written in CWL
+ arv: "http://arvados.org/cwl#"
+ cwltool: "http://commonwl.org/cwltool#"
+class: ExpressionTool
+cwlVersion: v1.1
+label: Create array of gvcfs to process
+ InlineJavascriptRequirement: {}
+ fastjdir:
+ type: Directory
+ label: Input directory of fastj
+ loadListing: 'shallow_listing'
+ fastq1:
+ type: File[]
+ fastq2:
+ type: File[]
+expression: |
+ ${function compare(a, b) {
+ var baseA = a.basename;
+ var baseB = b.basename;
+ var comparison = 0;
+ if (baseA > baseB) {
+ comparison = 1;
+ } else if (baseA < baseB) {
+ comparison = -1;
+ }
+ return comparison;
+ }
+ var fastq1 = [];
+ var fastq2 = [];
+ for (var i = 0; i < inputs.fastjdir.listing.length; i++) {
+ var name = inputs.fastjdir.listing[i];
+ if (name.basename.indexOf('_1.fastq.gz') != -1 ) {
+ fastq1.push(name);
+ }
+ if (name.basename.indexOf('_2.fastq.gz') != -1 ) {
+ fastq2.push(name);
+ }
+ }
+ fastq1 = fastq1.sort(compare)
+ fastq2 = fastq2.sort(compare)
+ return {"fastq1": fastq1, "fastq2": fastq2};
+ }
+cwlVersion: v1.1
+class: Workflow
+ - class: SubworkflowFeatureRequirement
+ fastq1: File
+ fastq2: File
+ reference:
+ type: File
+ secondaryFiles:
+ - .amb
+ - .ann
+ - .bwt
+ - .pac
+ - .sa
+ - .fai
+ - ^.dict
+ sample: string
+ knownsites:
+ type: File
+ secondaryFiles:
+ - .tbi
+ scattercount: string
+ clinvarvcf: File
+ reportfunc: File
+ headhtml: File
+ tailhtml: File
+ qc-html:
+ type: File[]
+ outputSource: fastqc/out-html
+ qc-zip:
+ type: File[]
+ outputSource: fastqc/out-zip
+ gvcf:
+ type: File
+ outputSource: haplotypecaller/gatheredgvcf
+ report:
+ type: File
+ outputSource: generate-report/report
+ fastqc:
+ run: fastqc.cwl
+ in:
+ fastq1: fastq1
+ fastq2: fastq2
+ out: [out-html, out-zip]
+ bwamem-samtools-view:
+ run: bwamem-samtools-view.cwl
+ in:
+ fastq1: fastq1
+ fastq2: fastq2
+ reference: reference
+ sample: sample
+ out: [bam]
+ samtools-sort:
+ run: samtools-sort.cwl
+ in:
+ bam: bwamem-samtools-view/bam
+ sample: sample
+ out: [sortedbam]
+ mark-duplicates:
+ run: mark-duplicates.cwl
+ in:
+ bam: samtools-sort/sortedbam
+ out: [dupbam,dupmetrics]
+ samtools-index:
+ run: samtools-index.cwl
+ in:
+ bam: mark-duplicates/dupbam
+ out: [indexedbam]
+ haplotypecaller:
+ run: scatter-gatk-wf-with-interval.cwl
+ in:
+ reference: reference
+ bam: samtools-index/indexedbam
+ sample: sample
+ scattercount: scattercount
+ knownsites1: knownsites
+ out: [gatheredgvcf]
+ generate-report:
+ run: report-wf.cwl
+ in:
+ gvcf: haplotypecaller/gatheredgvcf
+ samplename: sample
+ clinvarvcf: clinvarvcf
+ reportfunc: reportfunc
+ headhtml: headhtml
+ tailhtml: tailhtml
+ out: [report]