From cc2f0d302b6f536f680a8459b12ec9972c9507ff Mon Sep 17 00:00:00 2001 From: Sarah Wait Zaranek Date: Fri, 8 May 2020 07:42:30 +0000 Subject: [PATCH] adding helper directory for structure, may reorg later adding cwl to do intervals and scattering Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek no issue # --- cwl/{ => helper}/annotate-vcf.cwl | 0 cwl/{ => helper}/bwa-gatk-wf.cwl | 0 cwl/{ => helper}/bwamem-samtools-sort.cwl | 0 .../bwamem-samtools-view.cwl} | 0 cwl/{ => helper}/calc-coverage.cwl | 0 cwl/{ => helper}/check-sam.cwl | 0 cwl/{ => helper}/fastqc.cwl | 0 cwl/{ => helper}/fix-groups.cwl | 0 cwl/helper/gatk-applyBQSR.cwl | 60 ++++++++++++++++ cwl/helper/gatk-applyBSQR-with-interval.cwl | 68 +++++++++++++++++++ .../gatk-baserecalibrator-with-interval.cwl | 68 +++++++++++++++++++ cwl/{ => helper}/gatk-baserecalibrator.cwl | 2 + .../gatk-haplotypecaller-with-interval.cwl | 67 ++++++++++++++++++ cwl/{ => helper}/gatk-haplotypecaller.cwl | 0 cwl/helper/gatk-splitintervals.cwl | 54 +++++++++++++++ cwl/helper/gatk-wf-with-interval.cwl | 61 +++++++++++++++++ cwl/{ => helper}/generate-report.cwl | 0 cwl/{ => helper}/gvcf-to-vcf.cwl | 0 cwl/{ => helper}/mark-duplicates.cwl | 0 cwl/{ => helper}/old/bwa-gatk-wf.cwl | 0 cwl/{ => helper}/old/samtools-sort.cwl | 0 cwl/{ => helper}/report-wf.cwl | 0 cwl/{ => helper}/samtools-fixmate.cwl | 0 cwl/{ => helper}/samtools-index.cwl | 3 +- cwl/{ => helper}/samtools-sort.cwl | 3 +- cwl/helper/scatter-gatk-wf-with-interval.cwl | 56 +++++++++++++++ 26 files changed, 440 insertions(+), 2 deletions(-) rename cwl/{ => helper}/annotate-vcf.cwl (100%) rename cwl/{ => helper}/bwa-gatk-wf.cwl (100%) rename cwl/{ => helper}/bwamem-samtools-sort.cwl (100%) rename cwl/{bwamem-samtools-view-org.cwl => helper/bwamem-samtools-view.cwl} (100%) rename cwl/{ => helper}/calc-coverage.cwl (100%) rename cwl/{ => helper}/check-sam.cwl (100%) rename cwl/{ => helper}/fastqc.cwl (100%) rename cwl/{ => helper}/fix-groups.cwl (100%) create mode 100644 cwl/helper/gatk-applyBQSR.cwl create mode 100644 cwl/helper/gatk-applyBSQR-with-interval.cwl create mode 100644 cwl/helper/gatk-baserecalibrator-with-interval.cwl rename cwl/{ => helper}/gatk-baserecalibrator.cwl (97%) create mode 100644 cwl/helper/gatk-haplotypecaller-with-interval.cwl rename cwl/{ => helper}/gatk-haplotypecaller.cwl (100%) create mode 100644 cwl/helper/gatk-splitintervals.cwl create mode 100644 cwl/helper/gatk-wf-with-interval.cwl rename cwl/{ => helper}/generate-report.cwl (100%) rename cwl/{ => helper}/gvcf-to-vcf.cwl (100%) rename cwl/{ => helper}/mark-duplicates.cwl (100%) rename cwl/{ => helper}/old/bwa-gatk-wf.cwl (100%) rename cwl/{ => helper}/old/samtools-sort.cwl (100%) rename cwl/{ => helper}/report-wf.cwl (100%) rename cwl/{ => helper}/samtools-fixmate.cwl (100%) rename cwl/{ => helper}/samtools-index.cwl (94%) rename cwl/{ => helper}/samtools-sort.cwl (95%) create mode 100644 cwl/helper/scatter-gatk-wf-with-interval.cwl diff --git a/cwl/annotate-vcf.cwl b/cwl/helper/annotate-vcf.cwl similarity index 100% rename from cwl/annotate-vcf.cwl rename to cwl/helper/annotate-vcf.cwl diff --git a/cwl/bwa-gatk-wf.cwl b/cwl/helper/bwa-gatk-wf.cwl similarity index 100% rename from cwl/bwa-gatk-wf.cwl rename to cwl/helper/bwa-gatk-wf.cwl diff --git a/cwl/bwamem-samtools-sort.cwl b/cwl/helper/bwamem-samtools-sort.cwl similarity index 100% rename from cwl/bwamem-samtools-sort.cwl rename to cwl/helper/bwamem-samtools-sort.cwl diff --git a/cwl/bwamem-samtools-view-org.cwl b/cwl/helper/bwamem-samtools-view.cwl similarity index 100% rename from cwl/bwamem-samtools-view-org.cwl rename to cwl/helper/bwamem-samtools-view.cwl diff --git a/cwl/calc-coverage.cwl b/cwl/helper/calc-coverage.cwl similarity index 100% rename from cwl/calc-coverage.cwl rename to cwl/helper/calc-coverage.cwl diff --git a/cwl/check-sam.cwl b/cwl/helper/check-sam.cwl similarity index 100% rename from cwl/check-sam.cwl rename to cwl/helper/check-sam.cwl diff --git a/cwl/fastqc.cwl b/cwl/helper/fastqc.cwl similarity index 100% rename from cwl/fastqc.cwl rename to cwl/helper/fastqc.cwl diff --git a/cwl/fix-groups.cwl b/cwl/helper/fix-groups.cwl similarity index 100% rename from cwl/fix-groups.cwl rename to cwl/helper/fix-groups.cwl diff --git a/cwl/helper/gatk-applyBQSR.cwl b/cwl/helper/gatk-applyBQSR.cwl new file mode 100644 index 0000000..c7f06f0 --- /dev/null +++ b/cwl/helper/gatk-applyBQSR.cwl @@ -0,0 +1,60 @@ +cwlVersion: v1.1 +class: CommandLineTool +label: Applying base quality score recalibration + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" + +requirements: + DockerRequirement: + dockerPull: broadinstitute/gatk:4.1.7.0 + InitialWorkDirRequirement: + listing: + - $(inputs.bam) + +hints: + arv:RuntimeConstraints: + outputDirType: keep_output_dir + keep_cache: 1024 + ResourceRequirement: + ramMin: 5000 + coresMin: 2 + +inputs: + bam: + type: File + reference: + type: File + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - ^.dict + sample: string + recaltable: + type: File + +outputs: + recalbam: + type: File + outputBinding: + glob: "*nodups_BQSR.bam" + +baseCommand: /gatk/gatk + +arguments: + - "--java-options" + - "-Xmx4G" + - ApplyBQSR + - prefix: "-R" + valueFrom: $(inputs.reference) + - prefix: "-I" + valueFrom: $(inputs.bam.basename) + - prefix: "--bqsr-recal-file" + valueFrom: $(inputs.recaltable) + - prefix: "-O" + valueFrom: $(inputs.sample)nodups_BQSR.bam diff --git a/cwl/helper/gatk-applyBSQR-with-interval.cwl b/cwl/helper/gatk-applyBSQR-with-interval.cwl new file mode 100644 index 0000000..41cfd2e --- /dev/null +++ b/cwl/helper/gatk-applyBSQR-with-interval.cwl @@ -0,0 +1,68 @@ +cwlVersion: v1.1 +class: CommandLineTool +label: Applying base quality score recalibration + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" + +requirements: + DockerRequirement: + dockerPull: broadinstitute/gatk:4.1.7.0 + InitialWorkDirRequirement: + listing: + - $(inputs.bam) + +hints: + arv:RuntimeConstraints: + outputDirType: keep_output_dir + keep_cache: 1024 + ResourceRequirement: + ramMin: 5000 + coresMin: 2 + +inputs: + bam: + type: File + secondaryFiles: + - .bai + reference: + type: File + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - ^.dict + sample: string + recaltable: + type: File + intervallist: + type: File + +outputs: + recalbam: + type: File + secondaryFiles: + - .bai + outputBinding: + glob: "*nodups_BQSR.bam" + +baseCommand: /gatk/gatk + +arguments: + - "--java-options" + - "-Xmx4G" + - ApplyBQSR + - prefix: "-R" + valueFrom: $(inputs.reference) + - prefix: "-I" + valueFrom: $(inputs.bam.basename) + - prefix: "--bqsr-recal-file" + valueFrom: $(inputs.recaltable) + - prefix: "-L" + valueFrom: $(inputs.intervallist) + - prefix: "-O" + valueFrom: $(inputs.sample)nodups_BQSR.bam diff --git a/cwl/helper/gatk-baserecalibrator-with-interval.cwl b/cwl/helper/gatk-baserecalibrator-with-interval.cwl new file mode 100644 index 0000000..dec3244 --- /dev/null +++ b/cwl/helper/gatk-baserecalibrator-with-interval.cwl @@ -0,0 +1,68 @@ +cwlVersion: v1.1 +class: CommandLineTool +label: Generating recalibration table for BQSR + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" + +requirements: + DockerRequirement: + dockerPull: broadinstitute/gatk:4.1.7.0 + InitialWorkDirRequirement: + listing: + - $(inputs.bam) + +hints: + arv:RuntimeConstraints: + outputDirType: keep_output_dir + keep_cache: 1024 + ResourceRequirement: + ramMin: 5000 + coresMin: 2 + +inputs: + bam: + type: File + secondaryFiles: + - .bai + reference: + type: File + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - ^.dict + sample: string + knownsites1: + type: File + secondaryFiles: + - .tbi + intervallist: + type: File + +outputs: + recaltable: + type: File + outputBinding: + glob: "*.table" + +baseCommand: /gatk/gatk + +arguments: + - "--java-options" + - "-Xmx4G" + - BaseRecalibrator + - prefix: "-R" + valueFrom: $(inputs.reference) + - prefix: "-I" + valueFrom: $(inputs.bam.basename) + - prefix: "--known-sites" + valueFrom: $(inputs.knownsites1) + - prefix: "-L" + valueFrom: $(inputs.intervallist) + - prefix: "-O" + valueFrom: $(inputs.sample)_recal_data.table diff --git a/cwl/gatk-baserecalibrator.cwl b/cwl/helper/gatk-baserecalibrator.cwl similarity index 97% rename from cwl/gatk-baserecalibrator.cwl rename to cwl/helper/gatk-baserecalibrator.cwl index a5105e8..7a788ba 100644 --- a/cwl/gatk-baserecalibrator.cwl +++ b/cwl/helper/gatk-baserecalibrator.cwl @@ -24,6 +24,8 @@ hints: inputs: bam: type: File + secondaryFiles: + - .bai reference: type: File secondaryFiles: diff --git a/cwl/helper/gatk-haplotypecaller-with-interval.cwl b/cwl/helper/gatk-haplotypecaller-with-interval.cwl new file mode 100644 index 0000000..e66c5b5 --- /dev/null +++ b/cwl/helper/gatk-haplotypecaller-with-interval.cwl @@ -0,0 +1,67 @@ +cwlVersion: v1.1 +class: CommandLineTool +label: Germline variant calling using GATK with output gvcf + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" + +requirements: + DockerRequirement: + dockerPull: broadinstitute/gatk:4.1.7.0 + +hints: + arv:RuntimeConstraints: + outputDirType: keep_output_dir + keep_cache: 1024 + ResourceRequirement: + ramMin: 3500 + coresMin: 2 + +inputs: + bam: + type: File + secondaryFiles: + - .bai + reference: + type: File + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - ^.dict + intervallist: + type: File + sample: string + +outputs: + gvcf: + type: File + outputBinding: + glob: "*vcf.gz" + +baseCommand: /gatk/gatk + +arguments: + - "--java-options" + - "-Xmx4G" + - HaplotypeCaller + - prefix: "-R" + valueFrom: $(inputs.reference) + - prefix: "-I" + valueFrom: $(inputs.bam) + - prefix: "-L" + valueFrom: $(inputs.intervallist) + - prefix: "-O" + valueFrom: $(runtime.outdir)/$(inputs.sample).gatk.g.vcf.gz + - prefix: "-ERC" + valueFrom: "GVCF" + - prefix: "-GQB" + valueFrom: "5" + - prefix: "-GQB" + valueFrom: "20" + - prefix: "-GQB" + valueFrom: "60" diff --git a/cwl/gatk-haplotypecaller.cwl b/cwl/helper/gatk-haplotypecaller.cwl similarity index 100% rename from cwl/gatk-haplotypecaller.cwl rename to cwl/helper/gatk-haplotypecaller.cwl diff --git a/cwl/helper/gatk-splitintervals.cwl b/cwl/helper/gatk-splitintervals.cwl new file mode 100644 index 0000000..de18380 --- /dev/null +++ b/cwl/helper/gatk-splitintervals.cwl @@ -0,0 +1,54 @@ +cwlVersion: v1.1 +class: CommandLineTool +label: Creating interval files for scattering + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" + +requirements: + DockerRequirement: + dockerPull: broadinstitute/gatk:4.1.7.0 + +hints: + arv:RuntimeConstraints: + outputDirType: keep_output_dir + keep_cache: 1024 + ResourceRequirement: + ramMin: 5000 + coresMin: 2 + +inputs: + reference: + type: File + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - ^.dict + sample: string + scattercount: string + +outputs: + intervalfiles: + type: File[] + outputBinding: + glob: "intervalfiles/*.interval_list" + +baseCommand: /gatk/gatk + +arguments: + - "--java-options" + - "-Xmx4G" + - SplitIntervals + - prefix: "-R" + valueFrom: $(inputs.reference) + - prefix: "--scatter-count" + valueFrom: $(inputs.scattercount) + - prefix: "--subdivision-mode" + valueFrom: "BALANCING_WITHOUT_INTERVAL_SUBDIVISION" + - prefix: "-O" + valueFrom: "intervalfiles" diff --git a/cwl/helper/gatk-wf-with-interval.cwl b/cwl/helper/gatk-wf-with-interval.cwl new file mode 100644 index 0000000..037accd --- /dev/null +++ b/cwl/helper/gatk-wf-with-interval.cwl @@ -0,0 +1,61 @@ +cwlVersion: v1.1 +class: Workflow + +requirements: + - class: SubworkflowFeatureRequirement + +inputs: + bam: + type: File + secondaryFiles: + - .bai + reference: + type: File + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - ^.dict + sample: string + knownsites1: + type: File + secondaryFiles: + - .tbi + intervallist: + type: File + +outputs: + gvcf: + type: File + outputSource: haplotypecaller/gvcf + +steps: + basecalibrator: + run: gatk-baserecalibrator-with-interval.cwl + in: + bam: bam + reference: reference + sample: sample + knownsites1: knownsites1 + intervallist: intervallist + out: [recaltable] + applyBQSR: + run: gatk-applyBSQR-with-interval.cwl + in: + reference: reference + bam: bam + sample: sample + intervallist: intervallist + recaltable: basecalibrator/recaltable + out: [recalbam] + haplotypecaller: + run: gatk-haplotypecaller-with-interval.cwl + in: + reference: reference + bam: applyBQSR/recalbam + sample: sample + intervallist: intervallist + out: [gvcf] diff --git a/cwl/generate-report.cwl b/cwl/helper/generate-report.cwl similarity index 100% rename from cwl/generate-report.cwl rename to cwl/helper/generate-report.cwl diff --git a/cwl/gvcf-to-vcf.cwl b/cwl/helper/gvcf-to-vcf.cwl similarity index 100% rename from cwl/gvcf-to-vcf.cwl rename to cwl/helper/gvcf-to-vcf.cwl diff --git a/cwl/mark-duplicates.cwl b/cwl/helper/mark-duplicates.cwl similarity index 100% rename from cwl/mark-duplicates.cwl rename to cwl/helper/mark-duplicates.cwl diff --git a/cwl/old/bwa-gatk-wf.cwl b/cwl/helper/old/bwa-gatk-wf.cwl similarity index 100% rename from cwl/old/bwa-gatk-wf.cwl rename to cwl/helper/old/bwa-gatk-wf.cwl diff --git a/cwl/old/samtools-sort.cwl b/cwl/helper/old/samtools-sort.cwl similarity index 100% rename from cwl/old/samtools-sort.cwl rename to cwl/helper/old/samtools-sort.cwl diff --git a/cwl/report-wf.cwl b/cwl/helper/report-wf.cwl similarity index 100% rename from cwl/report-wf.cwl rename to cwl/helper/report-wf.cwl diff --git a/cwl/samtools-fixmate.cwl b/cwl/helper/samtools-fixmate.cwl similarity index 100% rename from cwl/samtools-fixmate.cwl rename to cwl/helper/samtools-fixmate.cwl diff --git a/cwl/samtools-index.cwl b/cwl/helper/samtools-index.cwl similarity index 94% rename from cwl/samtools-index.cwl rename to cwl/helper/samtools-index.cwl index 3876f3d..605390a 100644 --- a/cwl/samtools-index.cwl +++ b/cwl/helper/samtools-index.cwl @@ -1,5 +1,6 @@ cwlVersion: v1.1 class: CommandLineTool +label: Indexing Bam File $namespaces: arv: "http://arvados.org/cwl#" @@ -20,7 +21,7 @@ inputs: bam: File outputs: - out: + bam: type: File outputBinding: glob: "*bam" diff --git a/cwl/samtools-sort.cwl b/cwl/helper/samtools-sort.cwl similarity index 95% rename from cwl/samtools-sort.cwl rename to cwl/helper/samtools-sort.cwl index c94d681..b583f66 100644 --- a/cwl/samtools-sort.cwl +++ b/cwl/helper/samtools-sort.cwl @@ -1,5 +1,6 @@ cwlVersion: v1.1 class: CommandLineTool +label: Sorting Bam file $namespaces: arv: "http://arvados.org/cwl#" @@ -26,7 +27,7 @@ inputs: sample: string outputs: - out: + bam: type: File outputBinding: glob: "*sorted.bam" diff --git a/cwl/helper/scatter-gatk-wf-with-interval.cwl b/cwl/helper/scatter-gatk-wf-with-interval.cwl new file mode 100644 index 0000000..80d0456 --- /dev/null +++ b/cwl/helper/scatter-gatk-wf-with-interval.cwl @@ -0,0 +1,56 @@ +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.1 +class: Workflow + +requirements: + - class: SubworkflowFeatureRequirement + - class: ScatterFeatureRequirement + +inputs: + bam: + type: File + secondaryFiles: + - .bai + reference: + type: File + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - ^.dict + sample: string + knownsites1: + type: File + secondaryFiles: + - .tbi + scattercount: string + +outputs: + gvcf: + type: File[] + outputSource: recal-haplotypecaller/gvcf + +steps: + splitintervals: + run: gatk-splitintervals.cwl + in: + reference: reference + sample: sample + scattercount: scattercount + out: [intervalfiles] + + recal-haplotypecaller: + run: gatk-wf-with-interval.cwl + scatter: intervallist + in: + bam: bam + reference: reference + sample: sample + knownsites1: knownsites1 + intervallist: splitintervals/intervalfiles + out: [gvcf] -- 2.30.2