Adding readme and clean up
authorSarah Wait Zaranek <swz@curii.com>
Wed, 17 Jun 2020 16:47:27 +0000 (16:47 +0000)
committerWard Vandewege <ward@jhvc.com>
Thu, 18 Jun 2020 15:16:40 +0000 (11:16 -0400)
Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek <swz@curii.com>
no issue #

WGS-processing/README [new file with mode: 0644]
WGS-processing/cwl/getfastq.cwl [new file with mode: 0644]
WGS-processing/cwl/helper/not-in-use/gather-vcf-try2.cwl [moved from WGS-processing/cwl/helper/gather-vcf-try2.cwl with 100% similarity]
WGS-processing/cwl/helper/not-in-use/gatk-applyBQSR.cwl [moved from WGS-processing/cwl/helper/gatk-applyBQSR.cwl with 100% similarity]
WGS-processing/cwl/helper/not-in-use/gatk-baserecalibrator.cwl [moved from WGS-processing/cwl/helper/gatk-baserecalibrator.cwl with 100% similarity]
WGS-processing/cwl/helper/not-in-use/gatk-haplotypecaller.cwl [moved from WGS-processing/cwl/helper/gatk-haplotypecaller.cwl with 100% similarity]
WGS-processing/cwl/wgs-processing-wf.cwl [new file with mode: 0644]

diff --git a/WGS-processing/README b/WGS-processing/README
new file mode 100644 (file)
index 0000000..2b5cded
--- /dev/null
@@ -0,0 +1,7 @@
+Arvados demo showing processing of  whole genome sequencing (WGS) data. The workflow includes:
+
+* Local alignment using BWA-MEM
+* Variant calling in parallel using GATK
+* Generation of HTML Report showing Comparision of Variants with ClinVar Public Archive 
+
+Workflows are written in CWL
diff --git a/WGS-processing/cwl/getfastq.cwl b/WGS-processing/cwl/getfastq.cwl
new file mode 100644 (file)
index 0000000..a38bc91
--- /dev/null
@@ -0,0 +1,49 @@
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+class: ExpressionTool
+cwlVersion: v1.1
+label: Create array of gvcfs to process
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  fastjdir:
+    type: Directory
+    label: Input directory of fastj
+    loadListing: 'shallow_listing' 
+outputs:
+  fastq1: 
+    type: File[]
+  fastq2:
+    type: File[]
+expression: |
+  ${function compare(a, b) {
+    var baseA = a.basename;
+    var baseB = b.basename;
+
+    var comparison = 0;
+    if (baseA > baseB) {
+    comparison = 1;
+    } else if (baseA < baseB) {
+    comparison = -1;
+    }
+    return comparison;
+    }
+
+    var fastq1 = [];
+    var fastq2 = [];
+    for (var i = 0; i < inputs.fastjdir.listing.length; i++) {
+      var name = inputs.fastjdir.listing[i];
+      if (name.basename.indexOf('_1.fastq.gz') != -1 ) {
+        fastq1.push(name);
+      }
+      if (name.basename.indexOf('_2.fastq.gz') != -1 ) {
+        fastq2.push(name);
+      }
+    }
+  
+    fastq1 = fastq1.sort(compare)
+    fastq2 = fastq2.sort(compare)
+    return {"fastq1": fastq1, "fastq2": fastq2};
+  }
diff --git a/WGS-processing/cwl/wgs-processing-wf.cwl b/WGS-processing/cwl/wgs-processing-wf.cwl
new file mode 100644 (file)
index 0000000..0a4b9d2
--- /dev/null
@@ -0,0 +1,93 @@
+cwlVersion: v1.1
+class: Workflow
+
+requirements:
+  - class: SubworkflowFeatureRequirement
+
+inputs:
+  fastq1: File
+  fastq2: File
+  reference:
+    type: File
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+  sample: string
+  knownsites:
+    type: File
+    secondaryFiles:
+      - .tbi   
+  scattercount: string
+  clinvarvcf: File
+  reportfunc: File
+  headhtml: File
+  tailhtml: File
+
+outputs:
+  qc-html:
+    type: File[]
+    outputSource: fastqc/out-html
+  qc-zip:
+    type: File[]
+    outputSource: fastqc/out-zip 
+  gvcf:
+    type: File
+    outputSource: haplotypecaller/gatheredgvcf
+  report:
+    type: File  
+    outputSource: generate-report/report
+steps:
+  fastqc:
+    run: fastqc.cwl
+    in:
+      fastq1: fastq1
+      fastq2: fastq2
+    out: [out-html, out-zip]
+  bwamem-samtools-view:
+    run: bwamem-samtools-view.cwl
+    in:
+      fastq1: fastq1
+      fastq2: fastq2
+      reference: reference
+      sample: sample
+    out: [bam]
+  samtools-sort:
+    run: samtools-sort.cwl 
+    in:
+      bam: bwamem-samtools-view/bam
+      sample: sample
+    out: [sortedbam]
+  mark-duplicates:
+    run: mark-duplicates.cwl
+    in:
+      bam: samtools-sort/sortedbam
+    out: [dupbam,dupmetrics]
+  samtools-index:
+    run: samtools-index.cwl
+    in:
+      bam: mark-duplicates/dupbam
+    out: [indexedbam]
+  haplotypecaller:
+    run: scatter-gatk-wf-with-interval.cwl 
+    in:
+      reference: reference
+      bam: samtools-index/indexedbam
+      sample: sample
+      scattercount: scattercount
+      knownsites1: knownsites
+    out: [gatheredgvcf]
+  generate-report:
+    run: report-wf.cwl
+    in:
+      gvcf: haplotypecaller/gatheredgvcf
+      samplename: sample
+      clinvarvcf: clinvarvcf
+      reportfunc: reportfunc
+      headhtml: headhtml
+      tailhtml: tailhtml
+    out: [report]