Adding files for gathering gvcfs and cleaning gvcfs
authorSarah Wait Zaranek <swz@curii.com>
Thu, 28 May 2020 01:39:28 +0000 (01:39 +0000)
committerWard Vandewege <ward@jhvc.com>
Thu, 18 Jun 2020 15:16:40 +0000 (11:16 -0400)
Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek <swz@curii.com>
no issue #

cwl/helper/gather-vcf-try2.cwl
cwl/helper/gather-vcf.cwl
cwl/helper/gatk-haplotypecaller-with-interval.cwl
cwl/helper/gatk-selectvariants.cwl [new file with mode: 0644]
src/variantcalling/checksex.sh [new file with mode: 0644]
src/variantcalling/sample-files.list [new file with mode: 0644]
yml/gather-vcf2.yml
yml/gatk-selectvariants.yml [new file with mode: 0644]

index 4bd8cf019910c7d3279e0505aee723630cd6632d..c573ffb95ed97e5450a3f78bfcbec0c4e42ce5ca 100644 (file)
@@ -9,10 +9,7 @@ requirements:
   DockerRequirement:
     dockerPull: broadinstitute/gatk:4.1.7.0
   ShellCommandRequirement: {}
   DockerRequirement:
     dockerPull: broadinstitute/gatk:4.1.7.0
   ShellCommandRequirement: {}
-  InitialWorkDirRequirement:
-    listing:
-      - $(inputs.gvcf1)
-      - $(inputs.gvcf2)
+  InlineJavascriptRequirement: {}
 
 hints:
   ResourceRequirement:
 
 hints:
   ResourceRequirement:
@@ -22,10 +19,7 @@ hints:
     outputDirType: keep_output_dir
 
 inputs:
     outputDirType: keep_output_dir
 
 inputs:
-  gvcf1:
-    type: File
-  gvcf2:
-    type: File
+  gvcfarray: File[]
   sample: string
   reference:
     type: File
   sample: string
   reference:
     type: File
@@ -41,17 +35,22 @@ outputs:
   gatheredgvcf:
     type: File
     outputBinding:
   gatheredgvcf:
     type: File
     outputBinding:
-      glob: "*g.vcf.gz"
+      glob: "*.g.vcf.gz"
 
 baseCommand: /gatk/gatk
 
 arguments:
   - "--java-options"
   - "-Xmx8G" 
 
 baseCommand: /gatk/gatk
 
 arguments:
   - "--java-options"
   - "-Xmx8G" 
-  - MergeVcfs
-  - prefix: "-I"
-    valueFrom: $(inputs.gvcf1.basename) 
-  - prefix: "-I"
-    valueFrom: $(inputs.gvcf2.basename)
+  - GatherVcfs
+  - shellQuote: false
+    valueFrom: >
+      ${
+        var cmd "";
+        for( var i = 0; i < inputs.gvcfarray.length; i++){
+           cmd += "\s echo " + "-I" + "\s" + inputs.gvcfsarray[i]
+        }
+        return cmd;
+       } 
   - prefix: "-O"
   - prefix: "-O"
-    valueFrom: $(inputs.sample)g.vcf.gz
+    valueFrom: $(inputs.sample).g.vcf.gz
index 7bba8a9a52c5bbab520c24557fd559f825b7968f..8e5b29a8c6063496f152e5533b49721f449f9a26 100644 (file)
@@ -11,7 +11,6 @@ requirements:
   ShellCommandRequirement: {}
   InitialWorkDirRequirement:
     listing:
   ShellCommandRequirement: {}
   InitialWorkDirRequirement:
     listing:
-      - $(inputs.gvcflist)
       - $(inputs.gvcf1)
       - $(inputs.gvcf2)
 
       - $(inputs.gvcf1)
       - $(inputs.gvcf2)
 
@@ -27,8 +26,6 @@ inputs:
     type: File
   gvcf2:
     type: File
     type: File
   gvcf2:
     type: File
-  gvcflist:
-    type: File
   sample: string
   reference:
     type: File
   sample: string
   reference:
     type: File
@@ -44,7 +41,7 @@ outputs:
   gatheredgvcf:
     type: File
     outputBinding:
   gatheredgvcf:
     type: File
     outputBinding:
-      glob: "*g.vcf.gz"
+      glob: "*.g.vcf.gz"
 
 baseCommand: /gatk/gatk
 
 
 baseCommand: /gatk/gatk
 
@@ -52,7 +49,9 @@ arguments:
   - "--java-options"
   - "-Xmx8G" 
   - GatherVcfs
   - "--java-options"
   - "-Xmx8G" 
   - GatherVcfs
-  - prefix: "-I"
-    valueFrom: $(inputs.gvcflist.basename) 
+  - "-I"
+  - $(inputs.gvcf1.basename) 
+  - "-I"
+  - $(inputs.gvcf2.basename)
   - prefix: "-O"
   - prefix: "-O"
-    valueFrom: $(inputs.sample)g.vcf.gz
+    valueFrom: $(inputs.sample).g.vcf.gz
index e66c5b5b37631f8613227399356b6fa679e8d9ba..3e0b53d3e53a2a22fb0f7e616b0a78c7316a2aca 100644 (file)
@@ -56,7 +56,7 @@ arguments:
   - prefix: "-L"
     valueFrom: $(inputs.intervallist)
   - prefix: "-O"
   - prefix: "-L"
     valueFrom: $(inputs.intervallist)
   - prefix: "-O"
-    valueFrom: $(runtime.outdir)/$(inputs.sample).gatk.g.vcf.gz
+    valueFrom: $(runtime.outdir)/$(inputs.sample)-$(inputs.intervallist.nameroot).gatk.g.vcf.gz
   - prefix: "-ERC"
     valueFrom: "GVCF"
   - prefix: "-GQB"
   - prefix: "-ERC"
     valueFrom: "GVCF"
   - prefix: "-GQB"
diff --git a/cwl/helper/gatk-selectvariants.cwl b/cwl/helper/gatk-selectvariants.cwl
new file mode 100644 (file)
index 0000000..2a65e53
--- /dev/null
@@ -0,0 +1,56 @@
+cwlVersion: v1.1
+class: CommandLineTool
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
+requirements:
+  DockerRequirement:
+    dockerPull: broadinstitute/gatk:4.1.7.0
+
+hints:
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+    keep_cache: 1024
+  ResourceRequirement:
+    ramMin: 5000
+    coresMin: 2
+
+inputs:
+  gvcf:
+    type: File
+    secondaryFiles:
+      - .tbi
+  reference:
+    type: File
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+  sample: string
+
+outputs:
+  genotypegvcf:
+    type: File
+    outputBinding:
+      glob: "*selected.g.vcf.gz"
+
+baseCommand: /gatk/gatk
+
+arguments:
+  - "--java-options"
+  - "-Xmx4G"
+  - SelectVariants 
+  - prefix: "-R"
+    valueFrom: $(inputs.reference)
+  - prefix: "--remove-unused-alternates"
+    valueFrom: "true"
+  - prefix: "-V"
+    valueFrom: $(inputs.gvcf)
+  - prefix: "-O"
+    valueFrom: $(inputs.sample)selected.g.vcf.gz
diff --git a/src/variantcalling/checksex.sh b/src/variantcalling/checksex.sh
new file mode 100644 (file)
index 0000000..407111e
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+xcov=$(echo "scale=4; $(samtools idxstats $1 | grep 'chrX[^_]' | cut -f 3)/$(samtools idxstats $1 | grep 'chrX[^_]' | cut -f 2)" | bc)
+echo $xcov
+ycov=$(echo "scale=4; $(samtools idxstats $1 | grep "chrY[^_]" | cut -f 3)/$(samtools idxstats $1 | grep "chrY[^_]" | cut -f 2)" | bc)
+echo $ycov
+
+rat=$(echo "scale=4; ${xcov}/${ycov}" | bc)
+
+echo $rat
diff --git a/src/variantcalling/sample-files.list b/src/variantcalling/sample-files.list
new file mode 100644 (file)
index 0000000..d290f5b
--- /dev/null
@@ -0,0 +1,4 @@
+ERR1726424.gatk.g.vcf.gz
+ERR1726424.gatk.g.vcf_2.gz
+
+~                                                               
index ce9852f16636247d307d4eba339b20ce92c9925c..5f4e324c175d6c568a5357d708bf193e5142c1c3 100644 (file)
@@ -8,6 +8,6 @@ gvcf1:
 
 gvcf2:
   class: File
 
 gvcf2:
   class: File
-  location: keep:dec5da8ebcda12b3eece8705a4391f55+2510/ERR1726424.gatk.g.vcf_10.gz
+  location: keep:054cb90cfb20dedb862af1fdd636ad3f+297/ERR1726424_10.gatk.g.vcf.gz
 
 
 
 
diff --git a/yml/gatk-selectvariants.yml b/yml/gatk-selectvariants.yml
new file mode 100644 (file)
index 0000000..972459b
--- /dev/null
@@ -0,0 +1,9 @@
+gvcf:
+  class: File
+  location: keep:3b34d300671c8dc8d1b2327f6ddab140+587/ERR1726424.g.vcf.gz 
+
+reference:
+  class: File
+  location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
+
+sample: ERR1726424