adding helper directory for structure, may reorg later
authorSarah Wait Zaranek <swz@curii.com>
Fri, 8 May 2020 07:42:30 +0000 (07:42 +0000)
committerWard Vandewege <ward@jhvc.com>
Thu, 18 Jun 2020 15:16:40 +0000 (11:16 -0400)
adding cwl to do intervals and scattering
Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek <swz@curii.com>
no issue #

26 files changed:
cwl/helper/annotate-vcf.cwl [moved from cwl/annotate-vcf.cwl with 100% similarity]
cwl/helper/bwa-gatk-wf.cwl [moved from cwl/bwa-gatk-wf.cwl with 100% similarity]
cwl/helper/bwamem-samtools-sort.cwl [moved from cwl/bwamem-samtools-sort.cwl with 100% similarity]
cwl/helper/bwamem-samtools-view.cwl [moved from cwl/bwamem-samtools-view-org.cwl with 100% similarity]
cwl/helper/calc-coverage.cwl [moved from cwl/calc-coverage.cwl with 100% similarity]
cwl/helper/check-sam.cwl [moved from cwl/check-sam.cwl with 100% similarity]
cwl/helper/fastqc.cwl [moved from cwl/fastqc.cwl with 100% similarity]
cwl/helper/fix-groups.cwl [moved from cwl/fix-groups.cwl with 100% similarity]
cwl/helper/gatk-applyBQSR.cwl [new file with mode: 0644]
cwl/helper/gatk-applyBSQR-with-interval.cwl [new file with mode: 0644]
cwl/helper/gatk-baserecalibrator-with-interval.cwl [new file with mode: 0644]
cwl/helper/gatk-baserecalibrator.cwl [moved from cwl/gatk-baserecalibrator.cwl with 97% similarity]
cwl/helper/gatk-haplotypecaller-with-interval.cwl [new file with mode: 0644]
cwl/helper/gatk-haplotypecaller.cwl [moved from cwl/gatk-haplotypecaller.cwl with 100% similarity]
cwl/helper/gatk-splitintervals.cwl [new file with mode: 0644]
cwl/helper/gatk-wf-with-interval.cwl [new file with mode: 0644]
cwl/helper/generate-report.cwl [moved from cwl/generate-report.cwl with 100% similarity]
cwl/helper/gvcf-to-vcf.cwl [moved from cwl/gvcf-to-vcf.cwl with 100% similarity]
cwl/helper/mark-duplicates.cwl [moved from cwl/mark-duplicates.cwl with 100% similarity]
cwl/helper/old/bwa-gatk-wf.cwl [moved from cwl/old/bwa-gatk-wf.cwl with 100% similarity]
cwl/helper/old/samtools-sort.cwl [moved from cwl/old/samtools-sort.cwl with 100% similarity]
cwl/helper/report-wf.cwl [moved from cwl/report-wf.cwl with 100% similarity]
cwl/helper/samtools-fixmate.cwl [moved from cwl/samtools-fixmate.cwl with 100% similarity]
cwl/helper/samtools-index.cwl [moved from cwl/samtools-index.cwl with 94% similarity]
cwl/helper/samtools-sort.cwl [moved from cwl/samtools-sort.cwl with 95% similarity]
cwl/helper/scatter-gatk-wf-with-interval.cwl [new file with mode: 0644]

similarity index 100%
rename from cwl/check-sam.cwl
rename to cwl/helper/check-sam.cwl
similarity index 100%
rename from cwl/fastqc.cwl
rename to cwl/helper/fastqc.cwl
similarity index 100%
rename from cwl/fix-groups.cwl
rename to cwl/helper/fix-groups.cwl
diff --git a/cwl/helper/gatk-applyBQSR.cwl b/cwl/helper/gatk-applyBQSR.cwl
new file mode 100644 (file)
index 0000000..c7f06f0
--- /dev/null
@@ -0,0 +1,60 @@
+cwlVersion: v1.1
+class: CommandLineTool
+label: Applying base quality score recalibration 
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
+requirements:
+  DockerRequirement:
+    dockerPull: broadinstitute/gatk:4.1.7.0
+  InitialWorkDirRequirement:
+    listing:
+      - $(inputs.bam)
+
+hints:
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+    keep_cache: 1024
+  ResourceRequirement:
+    ramMin: 5000
+    coresMin: 2
+
+inputs:
+  bam:
+    type: File
+  reference:
+    type: File
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+  sample: string
+  recaltable: 
+    type: File
+
+outputs:
+  recalbam:
+    type: File
+    outputBinding:
+      glob: "*nodups_BQSR.bam"
+
+baseCommand: /gatk/gatk
+
+arguments:
+  - "--java-options"
+  - "-Xmx4G"
+  - ApplyBQSR
+  - prefix: "-R"
+    valueFrom: $(inputs.reference)
+  - prefix: "-I"
+    valueFrom: $(inputs.bam.basename)
+  - prefix: "--bqsr-recal-file"
+    valueFrom: $(inputs.recaltable)
+  - prefix: "-O"
+    valueFrom: $(inputs.sample)nodups_BQSR.bam
diff --git a/cwl/helper/gatk-applyBSQR-with-interval.cwl b/cwl/helper/gatk-applyBSQR-with-interval.cwl
new file mode 100644 (file)
index 0000000..41cfd2e
--- /dev/null
@@ -0,0 +1,68 @@
+cwlVersion: v1.1
+class: CommandLineTool
+label: Applying base quality score recalibration 
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
+requirements:
+  DockerRequirement:
+    dockerPull: broadinstitute/gatk:4.1.7.0
+  InitialWorkDirRequirement:
+    listing:
+      - $(inputs.bam)
+
+hints:
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+    keep_cache: 1024
+  ResourceRequirement:
+    ramMin: 5000
+    coresMin: 2
+
+inputs:
+  bam:
+    type: File
+    secondaryFiles:
+      - .bai
+  reference:
+    type: File
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+  sample: string
+  recaltable: 
+    type: File
+  intervallist:
+    type: File
+
+outputs:
+  recalbam:
+    type: File
+    secondaryFiles: 
+      - .bai
+    outputBinding:
+      glob: "*nodups_BQSR.bam"
+
+baseCommand: /gatk/gatk
+
+arguments:
+  - "--java-options"
+  - "-Xmx4G"
+  - ApplyBQSR
+  - prefix: "-R"
+    valueFrom: $(inputs.reference)
+  - prefix: "-I"
+    valueFrom: $(inputs.bam.basename)
+  - prefix: "--bqsr-recal-file"
+    valueFrom: $(inputs.recaltable)
+  - prefix: "-L"
+    valueFrom: $(inputs.intervallist)
+  - prefix: "-O"
+    valueFrom: $(inputs.sample)nodups_BQSR.bam
diff --git a/cwl/helper/gatk-baserecalibrator-with-interval.cwl b/cwl/helper/gatk-baserecalibrator-with-interval.cwl
new file mode 100644 (file)
index 0000000..dec3244
--- /dev/null
@@ -0,0 +1,68 @@
+cwlVersion: v1.1
+class: CommandLineTool
+label: Generating recalibration table for BQSR 
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
+requirements:
+  DockerRequirement:
+    dockerPull: broadinstitute/gatk:4.1.7.0
+  InitialWorkDirRequirement:
+    listing:
+      - $(inputs.bam)
+
+hints:
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+    keep_cache: 1024
+  ResourceRequirement:
+    ramMin: 5000
+    coresMin: 2
+
+inputs:
+  bam:
+    type: File
+    secondaryFiles:
+     - .bai
+  reference:
+    type: File
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+  sample: string
+  knownsites1: 
+    type: File
+    secondaryFiles:
+      - .tbi
+  intervallist:
+    type: File
+
+outputs:
+  recaltable:
+    type: File
+    outputBinding:
+      glob: "*.table"
+
+baseCommand: /gatk/gatk
+
+arguments:
+  - "--java-options"
+  - "-Xmx4G"
+  - BaseRecalibrator
+  - prefix: "-R"
+    valueFrom: $(inputs.reference)
+  - prefix: "-I"
+    valueFrom: $(inputs.bam.basename)
+  - prefix: "--known-sites"
+    valueFrom: $(inputs.knownsites1)
+  - prefix: "-L"
+    valueFrom: $(inputs.intervallist)
+  - prefix: "-O"
+    valueFrom: $(inputs.sample)_recal_data.table
similarity index 97%
rename from cwl/gatk-baserecalibrator.cwl
rename to cwl/helper/gatk-baserecalibrator.cwl
index a5105e8a692924741573e54c09c05d82415a9cf7..7a788ba0dd53e31123b4f7136857071a341887a8 100644 (file)
@@ -24,6 +24,8 @@ hints:
 inputs:
   bam:
     type: File
+    secondaryFiles:
+      - .bai
   reference:
     type: File
     secondaryFiles:
diff --git a/cwl/helper/gatk-haplotypecaller-with-interval.cwl b/cwl/helper/gatk-haplotypecaller-with-interval.cwl
new file mode 100644 (file)
index 0000000..e66c5b5
--- /dev/null
@@ -0,0 +1,67 @@
+cwlVersion: v1.1
+class: CommandLineTool
+label: Germline variant calling using GATK with output gvcf
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
+requirements:
+  DockerRequirement:
+    dockerPull: broadinstitute/gatk:4.1.7.0
+
+hints:
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+    keep_cache: 1024 
+  ResourceRequirement:
+    ramMin: 3500
+    coresMin: 2   
+
+inputs:
+  bam:
+    type: File
+    secondaryFiles:
+      - .bai
+  reference:
+    type: File
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+  intervallist:
+    type: File
+  sample: string
+
+outputs:
+  gvcf:
+    type: File
+    outputBinding:
+      glob: "*vcf.gz"
+
+baseCommand: /gatk/gatk
+
+arguments:
+  - "--java-options"
+  - "-Xmx4G" 
+  - HaplotypeCaller
+  - prefix: "-R"
+    valueFrom: $(inputs.reference)
+  - prefix: "-I"
+    valueFrom: $(inputs.bam)
+  - prefix: "-L"
+    valueFrom: $(inputs.intervallist)
+  - prefix: "-O"
+    valueFrom: $(runtime.outdir)/$(inputs.sample).gatk.g.vcf.gz
+  - prefix: "-ERC"
+    valueFrom: "GVCF"
+  - prefix: "-GQB"
+    valueFrom: "5"
+  - prefix: "-GQB"
+    valueFrom: "20"
+  - prefix: "-GQB"
+    valueFrom: "60"
diff --git a/cwl/helper/gatk-splitintervals.cwl b/cwl/helper/gatk-splitintervals.cwl
new file mode 100644 (file)
index 0000000..de18380
--- /dev/null
@@ -0,0 +1,54 @@
+cwlVersion: v1.1
+class: CommandLineTool
+label: Creating interval files for scattering
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
+requirements:
+  DockerRequirement:
+    dockerPull: broadinstitute/gatk:4.1.7.0
+
+hints:
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+    keep_cache: 1024
+  ResourceRequirement:
+    ramMin: 5000
+    coresMin: 2
+
+inputs:
+  reference:
+    type: File
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+  sample: string
+  scattercount: string
+outputs:
+  intervalfiles:
+    type: File[]
+    outputBinding:
+      glob: "intervalfiles/*.interval_list"
+
+baseCommand: /gatk/gatk
+
+arguments:
+  - "--java-options"
+  - "-Xmx4G"
+  - SplitIntervals
+  - prefix: "-R"
+    valueFrom: $(inputs.reference)
+  - prefix: "--scatter-count"
+    valueFrom: $(inputs.scattercount)
+  - prefix: "--subdivision-mode"
+    valueFrom: "BALANCING_WITHOUT_INTERVAL_SUBDIVISION"
+  - prefix: "-O"
+    valueFrom: "intervalfiles"
diff --git a/cwl/helper/gatk-wf-with-interval.cwl b/cwl/helper/gatk-wf-with-interval.cwl
new file mode 100644 (file)
index 0000000..037accd
--- /dev/null
@@ -0,0 +1,61 @@
+cwlVersion: v1.1
+class: Workflow
+
+requirements:
+  - class: SubworkflowFeatureRequirement
+
+inputs:
+  bam:
+    type: File
+    secondaryFiles:
+      - .bai
+  reference:
+    type: File
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+  sample: string
+  knownsites1:
+    type: File
+    secondaryFiles:
+      - .tbi
+  intervallist:
+    type: File
+
+outputs:
+  gvcf:
+    type: File
+    outputSource: haplotypecaller/gvcf
+
+steps:
+  basecalibrator:
+    run: gatk-baserecalibrator-with-interval.cwl
+    in:
+      bam: bam
+      reference: reference
+      sample: sample
+      knownsites1: knownsites1
+      intervallist: intervallist
+    out: [recaltable]
+  applyBQSR:
+    run: gatk-applyBSQR-with-interval.cwl
+    in: 
+      reference: reference
+      bam: bam
+      sample: sample
+      intervallist: intervallist
+      recaltable: basecalibrator/recaltable
+    out: [recalbam]
+  haplotypecaller:
+    run: gatk-haplotypecaller-with-interval.cwl
+    in:
+      reference: reference
+      bam: applyBQSR/recalbam
+      sample: sample
+      intervallist: intervallist
+    out: [gvcf]
similarity index 100%
rename from cwl/report-wf.cwl
rename to cwl/helper/report-wf.cwl
similarity index 94%
rename from cwl/samtools-index.cwl
rename to cwl/helper/samtools-index.cwl
index 3876f3d2f233e80b19664a42b69c58bfd0eda2a5..605390a9c4032972439f287e53346521b57959fe 100644 (file)
@@ -1,5 +1,6 @@
 cwlVersion: v1.1
 class: CommandLineTool
+label: Indexing Bam File
 
 $namespaces:
   arv: "http://arvados.org/cwl#"
@@ -20,7 +21,7 @@ inputs:
   bam: File
 
 outputs:
-  out:
+  bam:
     type: File
     outputBinding:
       glob: "*bam"
similarity index 95%
rename from cwl/samtools-sort.cwl
rename to cwl/helper/samtools-sort.cwl
index c94d681a99aa5b73ef7b29142c99c47ec19746e7..b583f66be91d2897efb58f3da911793e9cd3c4c7 100644 (file)
@@ -1,5 +1,6 @@
 cwlVersion: v1.1
 class: CommandLineTool
+label: Sorting Bam file
 
 $namespaces:
   arv: "http://arvados.org/cwl#"
@@ -26,7 +27,7 @@ inputs:
   sample: string
 
 outputs:
-  out:
+  bam:
     type: File
     outputBinding:
       glob: "*sorted.bam"
diff --git a/cwl/helper/scatter-gatk-wf-with-interval.cwl b/cwl/helper/scatter-gatk-wf-with-interval.cwl
new file mode 100644 (file)
index 0000000..80d0456
--- /dev/null
@@ -0,0 +1,56 @@
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.1
+class: Workflow
+
+requirements:
+  - class: SubworkflowFeatureRequirement
+  - class: ScatterFeatureRequirement
+
+inputs:
+  bam:
+    type: File
+    secondaryFiles:
+      - .bai
+  reference:
+    type: File
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+  sample: string
+  knownsites1:
+    type: File
+    secondaryFiles:
+      - .tbi
+  scattercount: string
+
+outputs:
+  gvcf:
+    type: File[]
+    outputSource: recal-haplotypecaller/gvcf
+
+steps:
+  splitintervals:
+    run: gatk-splitintervals.cwl
+    in:
+      reference: reference
+      sample: sample
+      scattercount: scattercount
+    out: [intervalfiles]
+      
+  recal-haplotypecaller: 
+    run: gatk-wf-with-interval.cwl
+    scatter: intervallist
+    in:
+      bam: bam
+      reference: reference
+      sample: sample
+      knownsites1: knownsites1
+      intervallist: splitintervals/intervalfiles
+    out: [gvcf]