From 89ac9b0dfb818c02b8fa1d6862820dcbdae14e26 Mon Sep 17 00:00:00 2001 From: Sarah Wait Zaranek Date: Wed, 17 Jun 2020 16:47:27 +0000 Subject: [PATCH] Adding readme and clean up Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek no issue # --- WGS-processing/README | 7 ++ WGS-processing/cwl/getfastq.cwl | 49 ++++++++++ .../{ => not-in-use}/gather-vcf-try2.cwl | 0 .../{ => not-in-use}/gatk-applyBQSR.cwl | 0 .../gatk-baserecalibrator.cwl | 0 .../{ => not-in-use}/gatk-haplotypecaller.cwl | 0 WGS-processing/cwl/wgs-processing-wf.cwl | 93 +++++++++++++++++++ 7 files changed, 149 insertions(+) create mode 100644 WGS-processing/README create mode 100644 WGS-processing/cwl/getfastq.cwl rename WGS-processing/cwl/helper/{ => not-in-use}/gather-vcf-try2.cwl (100%) rename WGS-processing/cwl/helper/{ => not-in-use}/gatk-applyBQSR.cwl (100%) rename WGS-processing/cwl/helper/{ => not-in-use}/gatk-baserecalibrator.cwl (100%) rename WGS-processing/cwl/helper/{ => not-in-use}/gatk-haplotypecaller.cwl (100%) create mode 100644 WGS-processing/cwl/wgs-processing-wf.cwl diff --git a/WGS-processing/README b/WGS-processing/README new file mode 100644 index 0000000..2b5cded --- /dev/null +++ b/WGS-processing/README @@ -0,0 +1,7 @@ +Arvados demo showing processing of whole genome sequencing (WGS) data. The workflow includes: + +* Local alignment using BWA-MEM +* Variant calling in parallel using GATK +* Generation of HTML Report showing Comparision of Variants with ClinVar Public Archive + +Workflows are written in CWL diff --git a/WGS-processing/cwl/getfastq.cwl b/WGS-processing/cwl/getfastq.cwl new file mode 100644 index 0000000..a38bc91 --- /dev/null +++ b/WGS-processing/cwl/getfastq.cwl @@ -0,0 +1,49 @@ +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +class: ExpressionTool +cwlVersion: v1.1 +label: Create array of gvcfs to process +requirements: + InlineJavascriptRequirement: {} +inputs: + fastjdir: + type: Directory + label: Input directory of fastj + loadListing: 'shallow_listing' +outputs: + fastq1: + type: File[] + fastq2: + type: File[] +expression: | + ${function compare(a, b) { + var baseA = a.basename; + var baseB = b.basename; + + var comparison = 0; + if (baseA > baseB) { + comparison = 1; + } else if (baseA < baseB) { + comparison = -1; + } + return comparison; + } + + var fastq1 = []; + var fastq2 = []; + for (var i = 0; i < inputs.fastjdir.listing.length; i++) { + var name = inputs.fastjdir.listing[i]; + if (name.basename.indexOf('_1.fastq.gz') != -1 ) { + fastq1.push(name); + } + if (name.basename.indexOf('_2.fastq.gz') != -1 ) { + fastq2.push(name); + } + } + + fastq1 = fastq1.sort(compare) + fastq2 = fastq2.sort(compare) + + return {"fastq1": fastq1, "fastq2": fastq2}; + } diff --git a/WGS-processing/cwl/helper/gather-vcf-try2.cwl b/WGS-processing/cwl/helper/not-in-use/gather-vcf-try2.cwl similarity index 100% rename from WGS-processing/cwl/helper/gather-vcf-try2.cwl rename to WGS-processing/cwl/helper/not-in-use/gather-vcf-try2.cwl diff --git a/WGS-processing/cwl/helper/gatk-applyBQSR.cwl b/WGS-processing/cwl/helper/not-in-use/gatk-applyBQSR.cwl similarity index 100% rename from WGS-processing/cwl/helper/gatk-applyBQSR.cwl rename to WGS-processing/cwl/helper/not-in-use/gatk-applyBQSR.cwl diff --git a/WGS-processing/cwl/helper/gatk-baserecalibrator.cwl b/WGS-processing/cwl/helper/not-in-use/gatk-baserecalibrator.cwl similarity index 100% rename from WGS-processing/cwl/helper/gatk-baserecalibrator.cwl rename to WGS-processing/cwl/helper/not-in-use/gatk-baserecalibrator.cwl diff --git a/WGS-processing/cwl/helper/gatk-haplotypecaller.cwl b/WGS-processing/cwl/helper/not-in-use/gatk-haplotypecaller.cwl similarity index 100% rename from WGS-processing/cwl/helper/gatk-haplotypecaller.cwl rename to WGS-processing/cwl/helper/not-in-use/gatk-haplotypecaller.cwl diff --git a/WGS-processing/cwl/wgs-processing-wf.cwl b/WGS-processing/cwl/wgs-processing-wf.cwl new file mode 100644 index 0000000..0a4b9d2 --- /dev/null +++ b/WGS-processing/cwl/wgs-processing-wf.cwl @@ -0,0 +1,93 @@ +cwlVersion: v1.1 +class: Workflow + +requirements: + - class: SubworkflowFeatureRequirement + +inputs: + fastq1: File + fastq2: File + reference: + type: File + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - ^.dict + sample: string + knownsites: + type: File + secondaryFiles: + - .tbi + scattercount: string + clinvarvcf: File + reportfunc: File + headhtml: File + tailhtml: File + +outputs: + qc-html: + type: File[] + outputSource: fastqc/out-html + qc-zip: + type: File[] + outputSource: fastqc/out-zip + gvcf: + type: File + outputSource: haplotypecaller/gatheredgvcf + report: + type: File + outputSource: generate-report/report +steps: + fastqc: + run: fastqc.cwl + in: + fastq1: fastq1 + fastq2: fastq2 + out: [out-html, out-zip] + bwamem-samtools-view: + run: bwamem-samtools-view.cwl + in: + fastq1: fastq1 + fastq2: fastq2 + reference: reference + sample: sample + out: [bam] + samtools-sort: + run: samtools-sort.cwl + in: + bam: bwamem-samtools-view/bam + sample: sample + out: [sortedbam] + mark-duplicates: + run: mark-duplicates.cwl + in: + bam: samtools-sort/sortedbam + out: [dupbam,dupmetrics] + samtools-index: + run: samtools-index.cwl + in: + bam: mark-duplicates/dupbam + out: [indexedbam] + haplotypecaller: + run: scatter-gatk-wf-with-interval.cwl + in: + reference: reference + bam: samtools-index/indexedbam + sample: sample + scattercount: scattercount + knownsites1: knownsites + out: [gatheredgvcf] + generate-report: + run: report-wf.cwl + in: + gvcf: haplotypecaller/gatheredgvcf + samplename: sample + clinvarvcf: clinvarvcf + reportfunc: reportfunc + headhtml: headhtml + tailhtml: tailhtml + out: [report] -- 2.30.2