From d2f6d47da38e96de4597bb58ba1193e261af8fb1 Mon Sep 17 00:00:00 2001 From: Jiayong Li Date: Tue, 15 Dec 2020 22:33:51 +0000 Subject: [PATCH] Add workflow to generate single chromosome fastqs Arvados-DCO-1.1-Signed-off-by: Jiayong Li refs #17049 --- .../bam2singlechromfastq.cwl | 77 +++++++++++++++++++ .../scatter-bam2singlechromfastq-wf.cwl | 54 +++++++++++++ .../yml/bam2singlechromfastq-test.yml | 6 ++ .../scatter-bam2singlechromfastq-wf-test.yml | 23 ++++++ 4 files changed, 160 insertions(+) create mode 100644 make-singlechromfastq/bam2singlechromfastq.cwl create mode 100644 make-singlechromfastq/scatter-bam2singlechromfastq-wf.cwl create mode 100644 make-singlechromfastq/yml/bam2singlechromfastq-test.yml create mode 100644 make-singlechromfastq/yml/scatter-bam2singlechromfastq-wf-test.yml diff --git a/make-singlechromfastq/bam2singlechromfastq.cwl b/make-singlechromfastq/bam2singlechromfastq.cwl new file mode 100644 index 0000000..5027631 --- /dev/null +++ b/make-singlechromfastq/bam2singlechromfastq.cwl @@ -0,0 +1,77 @@ +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + arv: "http://arvados.org/cwl#" +s:codeRepository: https://github.com/arvados/arvados-tutorial +s:license: https://www.gnu.org/licenses/agpl-3.0.en.html +cwlVersion: v1.1 +class: CommandLineTool +label: Convert one chromosome of bam to fastqs +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: curii/bwa-samtools + ResourceRequirement: + ramMin: 20000 + coresMin: 4 + arv:RuntimeConstraints: + keep_cache: 9216 + outputDirType: keep_output_dir + SoftwareRequirement: + packages: + Samtools: + specs: [ "https://identifiers.org/rrid/RRID:SCR_002105" ] + version: [ "1.10" ] +inputs: + bam: + type: File + #format: edam:format_2572 # BAM + label: Alignments in BAM format + secondaryFiles: [.bai] + sample: + type: string + label: Sample name + chrom: + type: string + label: Chromosome name +outputs: + fastq1: + type: File + #format: edam:format_1930 # FASTQ + label: One of set of pair-end FASTQs (R1) + outputBinding: + glob: "*_1.fastq.gz" + fastq2: + type: File + #format: edam:format_1930 # FASTQ + label: One of set of pair-end FASTQs (R2) + outputBinding: + glob: "*_2.fastq.gz" +baseCommand: [samtools, view] +arguments: + - "-b" + - $(inputs.bam) + - $(inputs.chrom) + - shellQuote: false + valueFrom: "|" + - "samtools" + - "sort" + - "-n" + - "-" + - shellQuote: false + valueFrom: "|" + - "samtools" + - "fastq" + - "-@" + - $(runtime.cores) + - "-" + - "-N" + - prefix: "-0" + valueFrom: "/dev/null" + - prefix: "-s" + valueFrom: "/dev/null" + - prefix: "-1" + valueFrom: $(inputs.sample)_1.fastq.gz + - prefix: "-2" + valueFrom: $(inputs.sample)_2.fastq.gz diff --git a/make-singlechromfastq/scatter-bam2singlechromfastq-wf.cwl b/make-singlechromfastq/scatter-bam2singlechromfastq-wf.cwl new file mode 100644 index 0000000..9ef1133 --- /dev/null +++ b/make-singlechromfastq/scatter-bam2singlechromfastq-wf.cwl @@ -0,0 +1,54 @@ +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ +s:codeRepository: https://github.com/arvados/arvados-tutorial +s:license: https://www.gnu.org/licenses/agpl-3.0.en.html +cwlVersion: v1.1 +class: Workflow +label: Scatter to convert one chromosome of bam to fastqs +requirements: + ScatterFeatureRequirement: {} + +inputs: + bams: + type: + type: array + items: File + format: edam:format_2572 # BAM + secondaryFiles: [.bai] + label: Alignments in BAM format + samples: + type: + type: array + items: string + label: Sample names + chrom: + type: string + label: Chromosome name + +outputs: + fastq1s: + type: + type: array + items: File + format: edam:format_1930 # FASTQ + label: One of set of pair-end FASTQs (R1) + outputSource: bam2singlechromfastq/fastq1 + fastq2s: + type: + type: array + items: File + format: edam:format_1930 # FASTQ + label: One of set of pair-end FASTQs (R2) + outputSource: bam2singlechromfastq/fastq2 + +steps: + bam2singlechromfastq: + run: bam2singlechromfastq.cwl + scatter: [bam, sample] + scatterMethod: dotproduct + in: + bam: bams + sample: samples + chrom: chrom + out: [fastq1, fastq2] diff --git a/make-singlechromfastq/yml/bam2singlechromfastq-test.yml b/make-singlechromfastq/yml/bam2singlechromfastq-test.yml new file mode 100644 index 0000000..1a50a0d --- /dev/null +++ b/make-singlechromfastq/yml/bam2singlechromfastq-test.yml @@ -0,0 +1,6 @@ +bam: + class: File + format: edam:format_2572 + location: keep:df46f3dd9eb77e1cfda3fa2b5c165858+60837/marked_dupsERR2122556.sorted.bam +sample: "ERR2122553" +chrom: "chr19" diff --git a/make-singlechromfastq/yml/scatter-bam2singlechromfastq-wf-test.yml b/make-singlechromfastq/yml/scatter-bam2singlechromfastq-wf-test.yml new file mode 100644 index 0000000..a0ff7a3 --- /dev/null +++ b/make-singlechromfastq/yml/scatter-bam2singlechromfastq-wf-test.yml @@ -0,0 +1,23 @@ +bams: + - class: File + format: edam:format_2572 + location: keep:0df1f086f260b8df5c3bd93197d84429+53025/marked_dupsERR2122553.sorted.bam + - class: File + format: edam:format_2572 + location: keep:0dc5d70a309efb72c062ab34441ec29b+50421/marked_dupsERR2122554.sorted.bam + - class: File + format: edam:format_2572 + location: keep:02264391f04e717ff90432e49a99f419+50631/marked_dupsERR2122555.sorted.bam + - class: File + format: edam:format_2572 + location: keep:df46f3dd9eb77e1cfda3fa2b5c165858+60837/marked_dupsERR2122556.sorted.bam + - class: File + format: edam:format_2572 + location: keep:2656a255e5ac6ce9d59d606bc9bed3a8+60669/marked_dupsERR2122557.sorted.bam +samples: + - "ERR2122553" + - "ERR2122554" + - "ERR2122555" + - "ERR2122556" + - "ERR2122557" +chrom: "chr19" -- 2.30.2