Updating yml for formats
authorSarah Wait Zaranek <swz@curii.com>
Tue, 7 Jul 2020 00:36:13 +0000 (00:36 +0000)
committerSarah Wait Zaranek <swz@curii.com>
Tue, 7 Jul 2020 00:36:13 +0000 (00:36 +0000)
Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek <swz@curii.com>
no issue #

19 files changed:
WGS-processing/cwl/helper/annotate-vcf.cwl
WGS-processing/cwl/helper/bwamem-gatk-report-wf.cwl
WGS-processing/cwl/helper/gather-array-vcf.cwl [new file with mode: 0644]
WGS-processing/cwl/helper/gather-vcf.cwl
WGS-processing/cwl/helper/gatk-wf-with-interval.cwl
WGS-processing/cwl/helper/gvcf-to-vcf.cwl
WGS-processing/cwl/helper/scatter-gatk-wf-with-interval.cwl
WGS-processing/cwl/wgs-processing-wf.cwl
WGS-processing/yml/helper/annotate-vcf.yml
WGS-processing/yml/helper/bwamem-gatk-report-wf.yml
WGS-processing/yml/helper/fastqc.yml
WGS-processing/yml/helper/gatk-applyBQSR-with-interval.yml
WGS-processing/yml/helper/gatk-haplotypecaller-with-interval.yml
WGS-processing/yml/helper/gatk-selectvariants.yml
WGS-processing/yml/helper/gatk-splitintervals.yml
WGS-processing/yml/helper/gatk-wf-with-interval.yml
WGS-processing/yml/helper/gvcf-to-vcf.yml
WGS-processing/yml/helper/mark-duplicates.yml
WGS-processing/yml/helper/samtools-index.yml

index bb4989e28b48aae7b6e346cbb3dc45e91d08110a..1e028c4adebba781620f27671f3e793575804561 100644 (file)
@@ -3,9 +3,9 @@ class: CommandLineTool
 label: Annotate and filter VCF
 
 requirements:
 label: Annotate and filter VCF
 
 requirements:
-  - class: DockerRequirement
+  DockerRequirement:
     dockerPull: curii/clinvar-report
     dockerPull: curii/clinvar-report
-  - class: ShellCommandRequirement
+  ShellCommandRequirement: {}
 
 hints:
   ResourceRequirement:
 
 hints:
   ResourceRequirement:
@@ -32,7 +32,6 @@ inputs:
 
 outputs:
   reporttxt: stdout
 
 outputs:
   reporttxt: stdout
-  label: Annotated text from VCF
 
 arguments:
   - bcftools
 
 arguments:
   - bcftools
index ee8ae4fdce5682340403ba4070a10fc52e1cb1f6..ea5b93b53224b3cdf7b79dfab25c8d8a7351657c 100644 (file)
@@ -3,7 +3,7 @@ class: Workflow
 label: WGS processing workflow for single sample
 
 requirements:
 label: WGS processing workflow for single sample
 
 requirements:
-  - class: SubworkflowFeatureRequirement
+  SubworkflowFeatureRequirement: {}
 
 inputs:
   fastq1:
 
 inputs:
   fastq1:
@@ -119,7 +119,7 @@ steps:
     run: report-wf.cwl
     in:
       gvcf: haplotypecaller/gatheredgvcf
     run: report-wf.cwl
     in:
       gvcf: haplotypecaller/gatheredgvcf
-      samplename: sample
+      sample: sample
       clinvarvcf: clinvarvcf
       reportfunc: reportfunc
       headhtml: headhtml
       clinvarvcf: clinvarvcf
       reportfunc: reportfunc
       headhtml: headhtml
@@ -133,6 +133,6 @@ $namespaces:
  s: https://schema.org/
  edam: http://edamontology.org/
 
  s: https://schema.org/
  edam: http://edamontology.org/
 
-$schemas:
- - https://schema.org/version/latest/schema.rdf
- - http://edamontology.org/EDAM_1.18.owl
+#$schemas:
+# - https://schema.org/version/latest/schema.rdf
+# - http://edamontology.org/EDAM_1.18.owl
diff --git a/WGS-processing/cwl/helper/gather-array-vcf.cwl b/WGS-processing/cwl/helper/gather-array-vcf.cwl
new file mode 100644 (file)
index 0000000..1e7d705
--- /dev/null
@@ -0,0 +1,103 @@
+cwlVersion: v1.1
+class: CommandLineTool
+label: Gather GVCFs
+
+requirements:
+  DockerRequirement:
+    dockerPull: broadinstitute/gatk:4.1.7.0
+  ShellCommandRequirement: {}
+  InlineJavascriptRequirement: {}
+
+hints:
+  ResourceRequirement:
+    ramMin: 20000
+    coresMin: 4    
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+
+inputs:
+  gvcfarray: 
+    type: File[] 
+    format: edam:format_3016 # GVCF
+    label: GVCFs for given intervals
+  sample: 
+    type: string
+    label: Sample Name
+  reference:
+    type: File
+    format: edam:format_1929 # FASTA
+    label: Reference genome
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+outputs:
+  gatheredgvcf:
+    type: File
+    format: edam:format_3016 # GVCF
+    label: Gathered GVC
+    secondaryFiles:
+      - .tbi
+    outputBinding:
+      glob: "*.g.vcf.gz"
+
+baseCommand: /gatk/gatk
+
+arguments:
+  - "--java-options"
+  - "-Xmx8G" 
+  - MergeVcfs
+  - shellQuote: false
+    valueFrom: | 
+     ${function compare(a, b) {
+      var baseA = a.basename;
+      var baseB = b.basename;
+
+      var comparison = 0;
+      if (baseA > baseB) {
+      comparison = 1;
+      } else if (baseA < baseB) {
+      comparison = -1;
+      }
+      return comparison;
+      }
+
+      var sortedarray = [];
+      sortedarray = inputs.gvcfarray.sort(compare)
+      var samples = [];
+      for (var i = 0; i < sortedarray.length; i++) {
+        var name = sortedarray[i];
+        if (name.nameext ==='.gz' ) {
+          samples.push(name.path);
+        }
+      }
+     
+      var sampleinput = "";
+
+      for (var i = 0; i < samples.length; i++) {
+       var s1 = samples[i];
+       sampleinput = sampleinput + "-I " + s1 + " "
+      }
+    
+      return sampleinput;
+      }
+  - prefix: "-O"
+    valueFrom: $(inputs.sample).g.vcf.gz
+
+s:codeRepository: https://github.com/arvados/arvados-tutorial
+s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
+
+$namespaces:
+ s: https://schema.org/
+ edam: http://edamontology.org/
+ arv: "http://arvados.org/cwl#"
+ cwltool: "http://commonwl.org/cwltool#"
+
+#$schemas:
+# - https://schema.org/version/latest/schema.rdf
+# - http://edamontology.org/EDAM_1.18.owl
index b59fc351aac0919207ed59f29779ba339dd08c19..4f796691af6d65db4349e9c954fa1aaa7600dba0 100644 (file)
@@ -2,10 +2,6 @@ cwlVersion: v1.1
 class: CommandLineTool
 label: Gather GVCF using Picard 
 
 class: CommandLineTool
 label: Gather GVCF using Picard 
 
-$namespaces:
-  arv: "http://arvados.org/cwl#"
-  cwltool: "http://commonwl.org/cwltool#"
-
 requirements:
   DockerRequirement:
     dockerPull: broadinstitute/gatk:4.1.7.0
 requirements:
   DockerRequirement:
     dockerPull: broadinstitute/gatk:4.1.7.0
index a5d1993652859aacc0430f982f2b50851336d8ac..de4a648e8a90cd08a697203e205e931a95be8b1c 100644 (file)
@@ -3,7 +3,7 @@ class: Workflow
 label: Variant calling workflow for given interval
 
 requirements:
 label: Variant calling workflow for given interval
 
 requirements:
-  - class: SubworkflowFeatureRequirement
+  SubworkflowFeatureRequirement: {}
 
 inputs:
   bam:
 
 inputs:
   bam:
index 4df5fd8b246379dbb55e39e8e862db9cb6b63f65..7df5fe2eee41a0b08dde333102eb61726603ab03 100644 (file)
@@ -3,9 +3,9 @@ class: CommandLineTool
 label: Convert GVCF to VCF
 
 requirements:
 label: Convert GVCF to VCF
 
 requirements:
-  - class: DockerRequirement
+  DockerRequirement:
     dockerPull: curii/clinvar-report
     dockerPull: curii/clinvar-report
-  - class: ShellCommandRequirement
+  ShellCommandRequirement: {}
 
 hints:
   ResourceRequirement:
 
 hints:
   ResourceRequirement:
index 75c605b822bf07b8fca8e3e66edc4e152153c014..9752edb90f3efb83f3221ca22c9a33b402a050e7 100644 (file)
@@ -3,8 +3,8 @@ class: Workflow
 label: Scattered variant calling workflow
 
 requirements:
 label: Scattered variant calling workflow
 
 requirements:
-  - class: SubworkflowFeatureRequirement
-  - class: ScatterFeatureRequirement
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
 
 inputs:
   bam:
 
 inputs:
   bam:
index 474a5e308fcd7fc0a3a671d579baa59891fa92fa..9459a3f619ed86188911f97c5825b2be4ef751e6 100644 (file)
@@ -90,6 +90,6 @@ $namespaces:
  s: https://schema.org/
  edam: http://edamontology.org/
 
  s: https://schema.org/
  edam: http://edamontology.org/
 
-$schemas:
- - https://schema.org/version/latest/schema.rdf
- - http://edamontology.org/EDAM_1.18.owl
+#$schemas:
+# - https://schema.org/version/latest/schema.rdf
+# - http://edamontology.org/EDAM_1.18.owl
index 74f0856be61a12f2107f0c9649c321e71b230bf1..e25dc7fe433a532edf3fb8b3300b1fddf1891482 100644 (file)
@@ -1,7 +1,9 @@
 vcf:
   class: File
 vcf:
   class: File
+  format: edam:format_3016
   location: keep:542f0989c5ee117e20d293ef82311192+114145/hu34D5B9_var-GS000015891-ASM.vcf.gz 
 
 clinvarvcf:
   class: File
   location: keep:542f0989c5ee117e20d293ef82311192+114145/hu34D5B9_var-GS000015891-ASM.vcf.gz 
 
 clinvarvcf:
   class: File
+  format: edam:format_3016
   location: keep:80aadf801bccd1e1d838b07d46d4de63+1712/37/clinvar.vcf.gz
   location: keep:80aadf801bccd1e1d838b07d46d4de63+1712/37/clinvar.vcf.gz
index 074d16a2aa0600825499d5047b729a1a4a196afe..5e5be2c355fdd3475dc6d423394a49b5c954c251 100644 (file)
@@ -2,18 +2,22 @@ sample: ERR1726424
 
 reference:
   class: File
 
 reference:
   class: File
+  format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
 fastq1:
   class: File
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
 fastq1:
   class: File
+  format: edam:format_1930
   location: keep:c96565a96cd428fe92bf70e97488bd80+21857/ERR1726424_1.fastq/ERR1726424_1.fastq.gz 
 
 fastq2:
   class: File
   location: keep:c96565a96cd428fe92bf70e97488bd80+21857/ERR1726424_1.fastq/ERR1726424_1.fastq.gz 
 
 fastq2:
   class: File
+  format: edam:format_1930
   location: keep:0ea6e3aaa6ead18a2cc4ba67a22939e9+25007/ERR1726424_2.fastq/ERR1726424_2.fastq.gz
 
 clinvarvcf:
   class: File
   location: keep:0ea6e3aaa6ead18a2cc4ba67a22939e9+25007/ERR1726424_2.fastq/ERR1726424_2.fastq.gz
 
 clinvarvcf:
   class: File
+  format: edam:format_3016
   location: keep:242ba3b9049aee86ab2c72db4f3b2822+223/38/clinvar.vcf.gz
 
 reportfunc:
   location: keep:242ba3b9049aee86ab2c72db4f3b2822+223/38/clinvar.vcf.gz
 
 reportfunc:
@@ -22,14 +26,17 @@ reportfunc:
 
 headhtml:
   class: File
 
 headhtml:
   class: File
+  format: edam:format_1964
   location: /data-sdd/pgp/arvados-tutorial/src/annotation/head.html
 
 tailhtml:
   class: File
   location: /data-sdd/pgp/arvados-tutorial/src/annotation/head.html
 
 tailhtml:
   class: File
+  format: edam:format_1964
   location: /data-sdd/pgp/arvados-tutorial/src/annotation/tail.html
 
 scattercount: '32'
 
 knownsites:
   class: File
   location: /data-sdd/pgp/arvados-tutorial/src/annotation/tail.html
 
 scattercount: '32'
 
 knownsites:
   class: File
+  format: edam:format_3016
   location: keep:7c0b13bda857fa15d88c1039182f69d5+8052/Homo_sapiens_assembly38.known_indels.vcf.gz
   location: keep:7c0b13bda857fa15d88c1039182f69d5+8052/Homo_sapiens_assembly38.known_indels.vcf.gz
index 53f4d7e9250942096a406bad165379355a18eb80..36bf7e1e020f5376260aea9b5d4e54a01b4d5a51 100644 (file)
@@ -1,7 +1,9 @@
 fastq1:
   class: File
 fastq1:
   class: File
+  format: edam:format_1930
   location: keep:c96565a96cd428fe92bf70e97488bd80+21857/ERR1726424_1.fastq/ERR1726424_1.fastq.gz
 
 fastq2:
   class: File
   location: keep:c96565a96cd428fe92bf70e97488bd80+21857/ERR1726424_1.fastq/ERR1726424_1.fastq.gz
 
 fastq2:
   class: File
+  format: edam:format_1930
   location: keep:0ea6e3aaa6ead18a2cc4ba67a22939e9+25007/ERR1726424_2.fastq/ERR1726424_2.fastq.gz
   location: keep:0ea6e3aaa6ead18a2cc4ba67a22939e9+25007/ERR1726424_2.fastq/ERR1726424_2.fastq.gz
index 6259b09b9c075c788cc657ec7b7a0903e2eb6895..c992b4f2d539a629af038e8314259047135f3f7c 100644 (file)
@@ -1,9 +1,11 @@
 bam:
   class: File
 bam:
   class: File
+  format: edam:format_2572
   location: keep:79407f4f0d46a44fcc9c377461ee85fc+53752/marked_dupsERR1726424.sorted.bam
 
 reference:
   class: File
   location: keep:79407f4f0d46a44fcc9c377461ee85fc+53752/marked_dupsERR1726424.sorted.bam
 
 reference:
   class: File
+  format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
 sample: ERR1726424
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
 sample: ERR1726424
index 4c42573c9bd6228e8b702a25e2fe0a5465fbcd56..b46c700adaad691a84f675f81a97c497320a9b93 100644 (file)
@@ -1,10 +1,15 @@
 bam:
   class: File
 bam:
   class: File
+  format: edam:format_2572
   location: keep:377c02de508dff9e4474f9a48d754933+53821/marked_dupsERR1726424.sorted.bam 
   location: keep:377c02de508dff9e4474f9a48d754933+53821/marked_dupsERR1726424.sorted.bam 
+
 reference:
   class: File
 reference:
   class: File
+  format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
+
 intervallist:
   class: File
   location: keep:4d4947f6637f0acec32580e09d991967+815/0000-scattered.interval_list
 intervallist:
   class: File
   location: keep:4d4947f6637f0acec32580e09d991967+815/0000-scattered.interval_list
+
 sample: ERR1726424
 sample: ERR1726424
index 972459bdef77cc98f1b55039f60924e36b017770..49d918cde47b722089c2de7c97ddc179b0a34aed 100644 (file)
@@ -1,9 +1,11 @@
 gvcf:
   class: File
 gvcf:
   class: File
+  format: edam:format_3016
   location: keep:3b34d300671c8dc8d1b2327f6ddab140+587/ERR1726424.g.vcf.gz 
 
 reference:
   class: File
   location: keep:3b34d300671c8dc8d1b2327f6ddab140+587/ERR1726424.g.vcf.gz 
 
 reference:
   class: File
+  format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
 sample: ERR1726424
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
 sample: ERR1726424
index 68dbcc4b4d1fb4cd4df9b4c08fc0b4bb35f26542..6c1c24abc80525046304dfbd7356104d7c58fed0 100644 (file)
@@ -1,5 +1,6 @@
 reference:
   class: File
 reference:
   class: File
+  format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
 sample: ERR1726424
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
 sample: ERR1726424
index e1e2b942056cbd52b23e3bdca77937ddbcec3436..22a9af5d9f85b827bbd3db9024f21207e831f515 100644 (file)
@@ -1,9 +1,11 @@
 bam:
   class: File
 bam:
   class: File
+  format: edam:format_2572
   location: keep:377c02de508dff9e4474f9a48d754933+53821/marked_dupsERR1726424.sorted.bam 
 
 reference:
   class: File
   location: keep:377c02de508dff9e4474f9a48d754933+53821/marked_dupsERR1726424.sorted.bam 
 
 reference:
   class: File
+  format: edam:format_1929
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
 intervallist:
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
 
 intervallist:
@@ -14,4 +16,5 @@ sample: ERR1726424
 
 knownsites:
   class: File
 
 knownsites:
   class: File
+  format: edam:format_3016
   location: keep:7c0b13bda857fa15d88c1039182f69d5+8052/Homo_sapiens_assembly38.known_indels.vcf.gz
   location: keep:7c0b13bda857fa15d88c1039182f69d5+8052/Homo_sapiens_assembly38.known_indels.vcf.gz
index 270836eba6032414bd535a55257e7d7645337e11..36186d5e2312ab96b9b5945b915c0030ac7d9b0d 100644 (file)
@@ -1,5 +1,6 @@
 gvcf:
   class: File
 gvcf:
   class: File
+  format: edam:format_3016
   location: keep:02615a79bbe37b48629db97ab43147ae+1123/hu297A5D_AE2CH6SK4DG.gatk.g.vcf 
 
   location: keep:02615a79bbe37b48629db97ab43147ae+1123/hu297A5D_AE2CH6SK4DG.gatk.g.vcf 
 
-samplename: hu297A5D_AE2CH6SK4DG
+sample: hu297A5D_AE2CH6SK4DG
index 5e68b8891226ae00e822adb589144c7ac4d71fb7..cc5578241d1237510450eebdad4f092519bdab96 100644 (file)
@@ -1,3 +1,4 @@
 bam:
   class: File
 bam:
   class: File
+  format: edam:format_2572
   location: keep:713b093318f16861084d17a252eef710+12715/WGC069888D.bam
   location: keep:713b093318f16861084d17a252eef710+12715/WGC069888D.bam
index 72a01f7c3db391409ec09c4c5b157620770f11d1..3f774797bcbf162715ec1181ba47c88c66e412c1 100644 (file)
@@ -1,3 +1,4 @@
 bam:
   class: File
 bam:
   class: File
+  format: edam:format_2572
   location: keep:a0ca1f68836aac0bcefaed9cf5c63200+68/out.sorted.bam
   location: keep:a0ca1f68836aac0bcefaed9cf5c63200+68/out.sorted.bam