Updating cwl and yml for intervals and other changes
authorSarah Wait Zaranek <swz@curii.com>
Thu, 23 Jul 2020 13:09:38 +0000 (13:09 +0000)
committerSarah Wait Zaranek <swz@curii.com>
Thu, 23 Jul 2020 13:09:38 +0000 (13:09 +0000)
Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek <swz@curii.com>
no issue #

WGS-processing/cwl/helper/bwamem-gatk-report-wf.cwl
WGS-processing/cwl/helper/gatk-splitintervals.cwl
WGS-processing/cwl/helper/getfastq.cwl
WGS-processing/cwl/helper/scatter-gatk-wf-with-interval.cwl
WGS-processing/cwl/wgs-processing-wf.cwl
WGS-processing/yml/helper/bwamem-gatk-report-wf.yml
WGS-processing/yml/helper/bwamem-samtools-view-test.yml [new file with mode: 0644]
WGS-processing/yml/helper/gatk-splitintervals.yml
WGS-processing/yml/helper/scatter-gatk-wf-with-interval.yml
WGS-processing/yml/wgs-processing-wf.yml

index 522bd822cdbd7178a9c1f8ee9028ca0a5cf86137..dbb1d5024731aa1923fa539217264e6a7070600c 100644 (file)
@@ -26,6 +26,9 @@ inputs:
       - .sa
       - .fai
       - ^.dict
       - .sa
       - .fai
       - ^.dict
+  fullintervallist:
+    type: File
+    label: Full list of intervals to operate over
   sample: 
     type: string
     label: Sample Name
   sample: 
     type: string
     label: Sample Name
@@ -110,6 +113,7 @@ steps:
     run: scatter-gatk-wf-with-interval.cwl 
     in:
       reference: reference
     run: scatter-gatk-wf-with-interval.cwl 
     in:
       reference: reference
+      fullintervallist: fullintervallist
       bam: samtools-index/indexedbam
       sample: sample
       scattercount: scattercount
       bam: samtools-index/indexedbam
       sample: sample
       scattercount: scattercount
index 98758d1e9e5474c4cb8e2cd7dd6aed3c9af12ee3..4909bc7412d0ebfcf64d12f652604d6a556a8da9 100644 (file)
@@ -32,6 +32,9 @@ inputs:
       - .sa
       - .fai
       - ^.dict
       - .sa
       - .fai
       - ^.dict
+  fullintervallist:
+    type: File
+    label: Full list of intervals to operate over
   sample:
     type: string
     label: Sample Name
   sample:
     type: string
     label: Sample Name
@@ -54,6 +57,8 @@ arguments:
   - SplitIntervals
   - prefix: "-R"
     valueFrom: $(inputs.reference)
   - SplitIntervals
   - prefix: "-R"
     valueFrom: $(inputs.reference)
+  - prefix: "-L"
+    valueFrom: $(inputs.fullintervallist)
   - prefix: "--scatter-count"
     valueFrom: $(inputs.scattercount)
   - prefix: "--subdivision-mode"
   - prefix: "--scatter-count"
     valueFrom: $(inputs.scattercount)
   - prefix: "--subdivision-mode"
index 035b4c3135695d3c1607e5d39b4e1f41fb8c6bc6..59c677112791346313f16a73c932fa66c8b6b8e5 100644 (file)
@@ -41,6 +41,7 @@ expression: |
     var fastq2 = [];
     for (var i = 0; i < inputs.fastqdir.listing.length; i++) {
       var name = inputs.fastqdir.listing[i];
     var fastq2 = [];
     for (var i = 0; i < inputs.fastqdir.listing.length; i++) {
       var name = inputs.fastqdir.listing[i];
+      name.format = 'http://edamontology.org/format_1930'
       if (name.basename.indexOf('_1.fastq.gz') != -1 ) {
         fastq1.push(name);
       }
       if (name.basename.indexOf('_1.fastq.gz') != -1 ) {
         fastq1.push(name);
       }
index 9752edb90f3efb83f3221ca22c9a33b402a050e7..80d7601115c8ef2cb1839e273266cdc1947ce4dd 100644 (file)
@@ -25,6 +25,9 @@ inputs:
       - .sa
       - .fai
       - ^.dict
       - .sa
       - .fai
       - ^.dict
+  fullintervallist:
+    type: File
+    label: Full list of intervals to operate over
   sample:
     type: string
     label: Sample Name
   sample:
     type: string
     label: Sample Name
@@ -52,6 +55,7 @@ steps:
     run: gatk-splitintervals.cwl
     in:
       reference: reference
     run: gatk-splitintervals.cwl
     in:
       reference: reference
+      fullintervallist: fullintervallist
       sample: sample
       scattercount: scattercount
     out: [intervalfiles]
       sample: sample
       scattercount: scattercount
     out: [intervalfiles]
index 7a5e6de34847a2686fdee4ef51f6dd60678d9d62..8cbcf5ee03b17f7d5800b611fd29a4d69cfcc105 100644 (file)
@@ -22,6 +22,8 @@ inputs:
       - .sa
       - .fai
       - ^.dict
       - .sa
       - .fai
       - ^.dict
+  fullintervallist:
+    type: File
   knownsites:
     type: File
     format: edam:format_3016 # VCF
   knownsites:
     type: File
     format: edam:format_3016 # VCF
@@ -74,6 +76,7 @@ steps:
       fastq1: getfastq/fastq1
       fastq2: getfastq/fastq2
       reference: reference
       fastq1: getfastq/fastq1
       fastq2: getfastq/fastq2
       reference: reference
+      fullintervallist: fullintervallist
       sample: getfastq/sample
       knownsites: knownsites
       scattercount: scattercount
       sample: getfastq/sample
       knownsites: knownsites
       scattercount: scattercount
index 5e5be2c355fdd3475dc6d423394a49b5c954c251..8a92726272133c31fd75e54cf977830d127584aa 100644 (file)
@@ -5,6 +5,10 @@ reference:
   format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
   format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
+fullintervallist:
+  class: File
+  location: /data-sdd/pgp/testdata/full-noalt-scattered.interval_list
+
 fastq1:
   class: File
   format: edam:format_1930
 fastq1:
   class: File
   format: edam:format_1930
diff --git a/WGS-processing/yml/helper/bwamem-samtools-view-test.yml b/WGS-processing/yml/helper/bwamem-samtools-view-test.yml
new file mode 100644 (file)
index 0000000..55b4787
--- /dev/null
@@ -0,0 +1,16 @@
+reference:
+  class: File
+  format: edam:format_1929 
+  location: keep:5eac1d1c03ff6404226421a8d8351cf1+6133/GRCh38_no_alt_plus_hs38d1_analysis_set.fna 
+
+fastq1:
+  class: File
+  format: edam:format_1930 
+  location: keep:24df1ea93ad5275aa511462ea85b0548+20283/WGC071838D_R1.fastq.gz
+
+fastq2:
+  class: File
+  format: edam:format_1930 
+  location: keep:24df1ea93ad5275aa511462ea85b0548+20283/WGC071838D_R2.fastq.gz
+
+sample: WGC071838D
index 6c1c24abc80525046304dfbd7356104d7c58fed0..e78265c2635bc6a6d229602efd29f58a3cf8dacb 100644 (file)
@@ -3,6 +3,11 @@ reference:
   format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
   format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
+fullintervallist:
+  class: File
+  location: /data-sdd/pgp/testdata/full-noalt-scattered.interval_list
+
 sample: ERR1726424
 
 scattercount: "32"
 sample: ERR1726424
 
 scattercount: "32"
+
index a6d741456fd98a7323e3bc80f12d9f4043f193c2..20b5a3b916449050ff08d56180db0b316a2b6b39 100644 (file)
@@ -8,6 +8,10 @@ reference:
   format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
   format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
+fullintervallist:
+  class: File
+  location: /data-sdd/pgp/testdata/full-noalt-scattered.interval_list
+
 sample: ERR1726424
 
 scattercount: '32'
 sample: ERR1726424
 
 scattercount: '32'
index f56f8cb3b2f506642e069c5da59c2a4b4df6f7c6..299a674eaa08056538c86d0745dabd6e80dea04f 100644 (file)
@@ -1,7 +1,11 @@
 reference:
   class: File
   format: edam:format_1929
 reference:
   class: File
   format: edam:format_1929
-  location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
+  location: keep:5eac1d1c03ff6404226421a8d8351cf1+6133/GRCh38_no_alt_plus_hs38d1_analysis_set.fna 
+
+fullintervallist:
+  class: File
+  location: /data-sdd/pgp/testdata/full-noalt-scattered.interval_list
 
 fastqdir:
   class: Directory
 
 fastqdir:
   class: Directory
@@ -26,7 +30,7 @@ tailhtml:
   format: edam:format_2331
   location: ../src/annotation/tail.html
 
   format: edam:format_2331
   location: ../src/annotation/tail.html
 
-scattercount: '32'
+scattercount: '20'
 
 knownsites:
   class: File
 
 knownsites:
   class: File