From 743a3472af12e0a6c5f23667216411f79135ef37 Mon Sep 17 00:00:00 2001 From: Sarah Wait Zaranek Date: Thu, 28 May 2020 01:39:28 +0000 Subject: [PATCH] Adding files for gathering gvcfs and cleaning gvcfs Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek no issue # --- cwl/helper/gather-vcf-try2.cwl | 29 +++++----- cwl/helper/gather-vcf.cwl | 13 ++--- .../gatk-haplotypecaller-with-interval.cwl | 2 +- cwl/helper/gatk-selectvariants.cwl | 56 +++++++++++++++++++ src/variantcalling/checksex.sh | 10 ++++ src/variantcalling/sample-files.list | 4 ++ yml/gather-vcf2.yml | 2 +- yml/gatk-selectvariants.yml | 9 +++ 8 files changed, 101 insertions(+), 24 deletions(-) create mode 100644 cwl/helper/gatk-selectvariants.cwl create mode 100644 src/variantcalling/checksex.sh create mode 100644 src/variantcalling/sample-files.list create mode 100644 yml/gatk-selectvariants.yml diff --git a/cwl/helper/gather-vcf-try2.cwl b/cwl/helper/gather-vcf-try2.cwl index 4bd8cf0..c573ffb 100644 --- a/cwl/helper/gather-vcf-try2.cwl +++ b/cwl/helper/gather-vcf-try2.cwl @@ -9,10 +9,7 @@ requirements: DockerRequirement: dockerPull: broadinstitute/gatk:4.1.7.0 ShellCommandRequirement: {} - InitialWorkDirRequirement: - listing: - - $(inputs.gvcf1) - - $(inputs.gvcf2) + InlineJavascriptRequirement: {} hints: ResourceRequirement: @@ -22,10 +19,7 @@ hints: outputDirType: keep_output_dir inputs: - gvcf1: - type: File - gvcf2: - type: File + gvcfarray: File[] sample: string reference: type: File @@ -41,17 +35,22 @@ outputs: gatheredgvcf: type: File outputBinding: - glob: "*g.vcf.gz" + glob: "*.g.vcf.gz" baseCommand: /gatk/gatk arguments: - "--java-options" - "-Xmx8G" - - MergeVcfs - - prefix: "-I" - valueFrom: $(inputs.gvcf1.basename) - - prefix: "-I" - valueFrom: $(inputs.gvcf2.basename) + - GatherVcfs + - shellQuote: false + valueFrom: > + ${ + var cmd ""; + for( var i = 0; i < inputs.gvcfarray.length; i++){ + cmd += "\s echo " + "-I" + "\s" + inputs.gvcfsarray[i] + } + return cmd; + } - prefix: "-O" - valueFrom: $(inputs.sample)g.vcf.gz + valueFrom: $(inputs.sample).g.vcf.gz diff --git a/cwl/helper/gather-vcf.cwl b/cwl/helper/gather-vcf.cwl index 7bba8a9..8e5b29a 100644 --- a/cwl/helper/gather-vcf.cwl +++ b/cwl/helper/gather-vcf.cwl @@ -11,7 +11,6 @@ requirements: ShellCommandRequirement: {} InitialWorkDirRequirement: listing: - - $(inputs.gvcflist) - $(inputs.gvcf1) - $(inputs.gvcf2) @@ -27,8 +26,6 @@ inputs: type: File gvcf2: type: File - gvcflist: - type: File sample: string reference: type: File @@ -44,7 +41,7 @@ outputs: gatheredgvcf: type: File outputBinding: - glob: "*g.vcf.gz" + glob: "*.g.vcf.gz" baseCommand: /gatk/gatk @@ -52,7 +49,9 @@ arguments: - "--java-options" - "-Xmx8G" - GatherVcfs - - prefix: "-I" - valueFrom: $(inputs.gvcflist.basename) + - "-I" + - $(inputs.gvcf1.basename) + - "-I" + - $(inputs.gvcf2.basename) - prefix: "-O" - valueFrom: $(inputs.sample)g.vcf.gz + valueFrom: $(inputs.sample).g.vcf.gz diff --git a/cwl/helper/gatk-haplotypecaller-with-interval.cwl b/cwl/helper/gatk-haplotypecaller-with-interval.cwl index e66c5b5..3e0b53d 100644 --- a/cwl/helper/gatk-haplotypecaller-with-interval.cwl +++ b/cwl/helper/gatk-haplotypecaller-with-interval.cwl @@ -56,7 +56,7 @@ arguments: - prefix: "-L" valueFrom: $(inputs.intervallist) - prefix: "-O" - valueFrom: $(runtime.outdir)/$(inputs.sample).gatk.g.vcf.gz + valueFrom: $(runtime.outdir)/$(inputs.sample)-$(inputs.intervallist.nameroot).gatk.g.vcf.gz - prefix: "-ERC" valueFrom: "GVCF" - prefix: "-GQB" diff --git a/cwl/helper/gatk-selectvariants.cwl b/cwl/helper/gatk-selectvariants.cwl new file mode 100644 index 0000000..2a65e53 --- /dev/null +++ b/cwl/helper/gatk-selectvariants.cwl @@ -0,0 +1,56 @@ +cwlVersion: v1.1 +class: CommandLineTool + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" + +requirements: + DockerRequirement: + dockerPull: broadinstitute/gatk:4.1.7.0 + +hints: + arv:RuntimeConstraints: + outputDirType: keep_output_dir + keep_cache: 1024 + ResourceRequirement: + ramMin: 5000 + coresMin: 2 + +inputs: + gvcf: + type: File + secondaryFiles: + - .tbi + reference: + type: File + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - ^.dict + sample: string + +outputs: + genotypegvcf: + type: File + outputBinding: + glob: "*selected.g.vcf.gz" + +baseCommand: /gatk/gatk + +arguments: + - "--java-options" + - "-Xmx4G" + - SelectVariants + - prefix: "-R" + valueFrom: $(inputs.reference) + - prefix: "--remove-unused-alternates" + valueFrom: "true" + - prefix: "-V" + valueFrom: $(inputs.gvcf) + - prefix: "-O" + valueFrom: $(inputs.sample)selected.g.vcf.gz diff --git a/src/variantcalling/checksex.sh b/src/variantcalling/checksex.sh new file mode 100644 index 0000000..407111e --- /dev/null +++ b/src/variantcalling/checksex.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +xcov=$(echo "scale=4; $(samtools idxstats $1 | grep 'chrX[^_]' | cut -f 3)/$(samtools idxstats $1 | grep 'chrX[^_]' | cut -f 2)" | bc) +echo $xcov +ycov=$(echo "scale=4; $(samtools idxstats $1 | grep "chrY[^_]" | cut -f 3)/$(samtools idxstats $1 | grep "chrY[^_]" | cut -f 2)" | bc) +echo $ycov + +rat=$(echo "scale=4; ${xcov}/${ycov}" | bc) + +echo $rat diff --git a/src/variantcalling/sample-files.list b/src/variantcalling/sample-files.list new file mode 100644 index 0000000..d290f5b --- /dev/null +++ b/src/variantcalling/sample-files.list @@ -0,0 +1,4 @@ +ERR1726424.gatk.g.vcf.gz +ERR1726424.gatk.g.vcf_2.gz + +~ diff --git a/yml/gather-vcf2.yml b/yml/gather-vcf2.yml index ce9852f..5f4e324 100644 --- a/yml/gather-vcf2.yml +++ b/yml/gather-vcf2.yml @@ -8,6 +8,6 @@ gvcf1: gvcf2: class: File - location: keep:dec5da8ebcda12b3eece8705a4391f55+2510/ERR1726424.gatk.g.vcf_10.gz + location: keep:054cb90cfb20dedb862af1fdd636ad3f+297/ERR1726424_10.gatk.g.vcf.gz diff --git a/yml/gatk-selectvariants.yml b/yml/gatk-selectvariants.yml new file mode 100644 index 0000000..972459b --- /dev/null +++ b/yml/gatk-selectvariants.yml @@ -0,0 +1,9 @@ +gvcf: + class: File + location: keep:3b34d300671c8dc8d1b2327f6ddab140+587/ERR1726424.g.vcf.gz + +reference: + class: File + location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa + +sample: ERR1726424 -- 2.30.2