Add workflow to generate single chromosome fastqs
authorJiayong Li <jli@veritasgenetics.com>
Tue, 15 Dec 2020 22:33:51 +0000 (22:33 +0000)
committerJiayong Li <jli@veritasgenetics.com>
Tue, 15 Dec 2020 22:33:51 +0000 (22:33 +0000)
Arvados-DCO-1.1-Signed-off-by: Jiayong Li <jli@curii.com>
refs #17049

make-singlechromfastq/bam2singlechromfastq.cwl [new file with mode: 0644]
make-singlechromfastq/scatter-bam2singlechromfastq-wf.cwl [new file with mode: 0644]
make-singlechromfastq/yml/bam2singlechromfastq-test.yml [new file with mode: 0644]
make-singlechromfastq/yml/scatter-bam2singlechromfastq-wf-test.yml [new file with mode: 0644]

diff --git a/make-singlechromfastq/bam2singlechromfastq.cwl b/make-singlechromfastq/bam2singlechromfastq.cwl
new file mode 100644 (file)
index 0000000..5027631
--- /dev/null
@@ -0,0 +1,77 @@
+$namespaces:
+ s: https://schema.org/
+ edam: http://edamontology.org/
+ arv: "http://arvados.org/cwl#"
+s:codeRepository: https://github.com/arvados/arvados-tutorial
+s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
+cwlVersion: v1.1
+class: CommandLineTool
+label: Convert one chromosome of bam to fastqs
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: curii/bwa-samtools
+  ResourceRequirement:
+    ramMin: 20000
+    coresMin: 4
+  arv:RuntimeConstraints:
+    keep_cache: 9216
+    outputDirType: keep_output_dir
+  SoftwareRequirement:
+    packages:
+      Samtools:
+        specs: [ "https://identifiers.org/rrid/RRID:SCR_002105" ]
+        version: [ "1.10" ]
+inputs:
+  bam:
+    type: File
+    #format: edam:format_2572 # BAM
+    label: Alignments in BAM format
+    secondaryFiles: [.bai]
+  sample:
+    type: string
+    label: Sample name
+  chrom:
+    type: string
+    label: Chromosome name
+outputs:
+  fastq1:
+    type: File
+    #format: edam:format_1930 # FASTQ
+    label: One of set of pair-end FASTQs (R1)
+    outputBinding:
+      glob: "*_1.fastq.gz"
+  fastq2:
+    type: File
+    #format: edam:format_1930 # FASTQ
+    label: One of set of pair-end FASTQs (R2)
+    outputBinding:
+      glob: "*_2.fastq.gz"
+baseCommand: [samtools, view]
+arguments:
+  - "-b"
+  - $(inputs.bam)
+  - $(inputs.chrom)
+  - shellQuote: false
+    valueFrom: "|"
+  - "samtools"
+  - "sort"
+  - "-n"
+  - "-"
+  - shellQuote: false
+    valueFrom: "|"
+  - "samtools"
+  - "fastq"
+  - "-@"
+  - $(runtime.cores)
+  - "-"
+  - "-N"
+  - prefix: "-0"
+    valueFrom: "/dev/null"
+  - prefix: "-s"
+    valueFrom: "/dev/null"
+  - prefix: "-1"
+    valueFrom: $(inputs.sample)_1.fastq.gz
+  - prefix: "-2"
+    valueFrom: $(inputs.sample)_2.fastq.gz
diff --git a/make-singlechromfastq/scatter-bam2singlechromfastq-wf.cwl b/make-singlechromfastq/scatter-bam2singlechromfastq-wf.cwl
new file mode 100644 (file)
index 0000000..9ef1133
--- /dev/null
@@ -0,0 +1,54 @@
+$namespaces:
+ s: https://schema.org/
+ edam: http://edamontology.org/
+s:codeRepository: https://github.com/arvados/arvados-tutorial
+s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
+cwlVersion: v1.1
+class: Workflow
+label: Scatter to convert one chromosome of bam to fastqs
+requirements:
+  ScatterFeatureRequirement: {}
+
+inputs:
+  bams:
+    type:
+      type: array
+      items: File
+    format: edam:format_2572 # BAM
+    secondaryFiles: [.bai]
+    label: Alignments in BAM format
+  samples:
+    type:
+      type: array
+      items: string
+    label: Sample names
+  chrom:
+    type: string
+    label: Chromosome name
+
+outputs:
+  fastq1s:
+    type:
+      type: array
+      items: File
+    format: edam:format_1930 # FASTQ
+    label: One of set of pair-end FASTQs (R1)
+    outputSource: bam2singlechromfastq/fastq1
+  fastq2s:
+    type:
+      type: array
+      items: File
+    format: edam:format_1930 # FASTQ
+    label: One of set of pair-end FASTQs (R2)
+    outputSource: bam2singlechromfastq/fastq2
+
+steps:
+  bam2singlechromfastq:
+    run: bam2singlechromfastq.cwl
+    scatter: [bam, sample]
+    scatterMethod: dotproduct
+    in:
+      bam: bams
+      sample: samples
+      chrom: chrom
+    out: [fastq1, fastq2]
diff --git a/make-singlechromfastq/yml/bam2singlechromfastq-test.yml b/make-singlechromfastq/yml/bam2singlechromfastq-test.yml
new file mode 100644 (file)
index 0000000..1a50a0d
--- /dev/null
@@ -0,0 +1,6 @@
+bam:
+  class: File
+  format: edam:format_2572
+  location: keep:df46f3dd9eb77e1cfda3fa2b5c165858+60837/marked_dupsERR2122556.sorted.bam
+sample: "ERR2122553"
+chrom: "chr19"
diff --git a/make-singlechromfastq/yml/scatter-bam2singlechromfastq-wf-test.yml b/make-singlechromfastq/yml/scatter-bam2singlechromfastq-wf-test.yml
new file mode 100644 (file)
index 0000000..a0ff7a3
--- /dev/null
@@ -0,0 +1,23 @@
+bams:
+  - class: File
+    format: edam:format_2572
+    location: keep:0df1f086f260b8df5c3bd93197d84429+53025/marked_dupsERR2122553.sorted.bam
+  - class: File
+    format: edam:format_2572
+    location: keep:0dc5d70a309efb72c062ab34441ec29b+50421/marked_dupsERR2122554.sorted.bam
+  - class: File
+    format: edam:format_2572
+    location: keep:02264391f04e717ff90432e49a99f419+50631/marked_dupsERR2122555.sorted.bam
+  - class: File
+    format: edam:format_2572
+    location: keep:df46f3dd9eb77e1cfda3fa2b5c165858+60837/marked_dupsERR2122556.sorted.bam
+  - class: File
+    format: edam:format_2572
+    location: keep:2656a255e5ac6ce9d59d606bc9bed3a8+60669/marked_dupsERR2122557.sorted.bam
+samples:
+  - "ERR2122553"
+  - "ERR2122554"
+  - "ERR2122555"
+  - "ERR2122556"
+  - "ERR2122557"
+chrom: "chr19"