Add scatter wf for overall scatter
authorSarah Wait Zaranek <swz@curii.com>
Wed, 17 Jun 2020 17:13:54 +0000 (17:13 +0000)
committerWard Vandewege <ward@jhvc.com>
Thu, 18 Jun 2020 15:16:40 +0000 (11:16 -0400)
Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek <swz@curii.com>
no issue #

WGS-processing/cwl/getfastq.cwl [deleted file]
WGS-processing/cwl/helper/.scatter-gatk-wf-with-interval.cwl.swp [new file with mode: 0644]
WGS-processing/cwl/helper/bwamem-gatk-report-wf.cwl [moved from WGS-processing/cwl/helper/bwa-gatk-report-wf.cwl with 100% similarity]
WGS-processing/cwl/helper/not-in-use/getgvcfs.cwl [moved from WGS-processing/cwl/helper/getgvcfs.cwl with 100% similarity]
WGS-processing/cwl/wgs-processing-wf.cwl

diff --git a/WGS-processing/cwl/getfastq.cwl b/WGS-processing/cwl/getfastq.cwl
deleted file mode 100644 (file)
index a38bc91..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-$namespaces:
-  arv: "http://arvados.org/cwl#"
-  cwltool: "http://commonwl.org/cwltool#"
-class: ExpressionTool
-cwlVersion: v1.1
-label: Create array of gvcfs to process
-requirements:
-  InlineJavascriptRequirement: {}
-inputs:
-  fastjdir:
-    type: Directory
-    label: Input directory of fastj
-    loadListing: 'shallow_listing' 
-outputs:
-  fastq1: 
-    type: File[]
-  fastq2:
-    type: File[]
-expression: |
-  ${function compare(a, b) {
-    var baseA = a.basename;
-    var baseB = b.basename;
-
-    var comparison = 0;
-    if (baseA > baseB) {
-    comparison = 1;
-    } else if (baseA < baseB) {
-    comparison = -1;
-    }
-    return comparison;
-    }
-
-    var fastq1 = [];
-    var fastq2 = [];
-    for (var i = 0; i < inputs.fastjdir.listing.length; i++) {
-      var name = inputs.fastjdir.listing[i];
-      if (name.basename.indexOf('_1.fastq.gz') != -1 ) {
-        fastq1.push(name);
-      }
-      if (name.basename.indexOf('_2.fastq.gz') != -1 ) {
-        fastq2.push(name);
-      }
-    }
-  
-    fastq1 = fastq1.sort(compare)
-    fastq2 = fastq2.sort(compare)
-    return {"fastq1": fastq1, "fastq2": fastq2};
-  }
diff --git a/WGS-processing/cwl/helper/.scatter-gatk-wf-with-interval.cwl.swp b/WGS-processing/cwl/helper/.scatter-gatk-wf-with-interval.cwl.swp
new file mode 100644 (file)
index 0000000..6973e8c
Binary files /dev/null and b/WGS-processing/cwl/helper/.scatter-gatk-wf-with-interval.cwl.swp differ
index 0a4b9d274d40a8b9ffe854b8d79f924671ac6953..f7d2d00d36d80c043515eb53f467dcb278c3b9fd 100644 (file)
@@ -3,10 +3,10 @@ class: Workflow
 
 requirements:
   - class: SubworkflowFeatureRequirement
 
 requirements:
   - class: SubworkflowFeatureRequirement
+  - class: ScatterFeatureRequirement
 
 inputs:
 
 inputs:
-  fastq1: File
-  fastq2: File
+  fastqdir: Directory 
   reference:
     type: File
     secondaryFiles:
   reference:
     type: File
     secondaryFiles:
@@ -29,65 +29,33 @@ inputs:
   tailhtml: File
 
 outputs:
   tailhtml: File
 
 outputs:
-  qc-html:
-    type: File[]
-    outputSource: fastqc/out-html
-  qc-zip:
-    type: File[]
-    outputSource: fastqc/out-zip 
   gvcf:
   gvcf:
-    type: File
-    outputSource: haplotypecaller/gatheredgvcf
+    type: File[]
+    outputSource: bwamem-gatk-report/gvcf
   report:
   report:
-    type: File  
-    outputSource: generate-report/report
+    type: File[]  
+    outputSource: bwamem-gatk-report/report
+
 steps:
 steps:
-  fastqc:
-    run: fastqc.cwl
-    in:
-      fastq1: fastq1
-      fastq2: fastq2
-    out: [out-html, out-zip]
-  bwamem-samtools-view:
-    run: bwamem-samtools-view.cwl
-    in:
-      fastq1: fastq1
-      fastq2: fastq2
-      reference: reference
-      sample: sample
-    out: [bam]
-  samtools-sort:
-    run: samtools-sort.cwl 
-    in:
-      bam: bwamem-samtools-view/bam
-      sample: sample
-    out: [sortedbam]
-  mark-duplicates:
-    run: mark-duplicates.cwl
+  getfastq:
+    run: ./helper/getfastq.cwl
     in:
     in:
-      bam: samtools-sort/sortedbam
-    out: [dupbam,dupmetrics]
-  samtools-index:
-    run: samtools-index.cwl
-    in:
-      bam: mark-duplicates/dupbam
-    out: [indexedbam]
-  haplotypecaller:
-    run: scatter-gatk-wf-with-interval.cwl 
+      fastqdir: fastqdir
+    out: [fastq1, fastq2]
+
+  bwamem-gatk-report:
+    run: ./helper/bwamem-gatk-report-wf.cwl
+    scatter: [fastq1, fastq2]
+    scatterMethod: dotproduct
     in:
     in:
+      fastq1: getfastq/fastq1
+      fastq2: getfastq/fastq2
       reference: reference
       reference: reference
-      bam: samtools-index/indexedbam
       sample: sample
       sample: sample
+      knownsites: knownsites
       scattercount: scattercount
       scattercount: scattercount
-      knownsites1: knownsites
-    out: [gatheredgvcf]
-  generate-report:
-    run: report-wf.cwl
-    in:
-      gvcf: haplotypecaller/gatheredgvcf
-      samplename: sample
       clinvarvcf: clinvarvcf
       reportfunc: reportfunc
       headhtml: headhtml
       tailhtml: tailhtml
       clinvarvcf: clinvarvcf
       reportfunc: reportfunc
       headhtml: headhtml
       tailhtml: tailhtml
-    out: [report]  
+    out: [qc-html,qc-zip,gvcf,report]