From 4452835b3d539a461619efb8c935ccbbc00a35ab Mon Sep 17 00:00:00 2001 From: Sarah Wait Zaranek Date: Wed, 1 Apr 2020 03:38:30 +0000 Subject: [PATCH] updating preprocessing code Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek no issue # --- src/preprocessing/cwl/createSingleBam-wf.cwl | 38 +++++++++++++++++ src/preprocessing/cwl/createSingleBam.cwl | 45 ++++++++++++++++++++ src/preprocessing/cwl/getfiles.cwl | 40 +++++++++++++++++ src/preprocessing/runcommand | 6 +++ src/preprocessing/yml/createSingleBam-wf.yml | 3 ++ src/preprocessing/yml/createSingleBam.yml | 5 +++ src/preprocessing/yml/getfiles.yml | 3 ++ 7 files changed, 140 insertions(+) create mode 100644 src/preprocessing/cwl/createSingleBam-wf.cwl create mode 100644 src/preprocessing/cwl/createSingleBam.cwl create mode 100644 src/preprocessing/cwl/getfiles.cwl create mode 100644 src/preprocessing/runcommand create mode 100644 src/preprocessing/yml/createSingleBam-wf.yml create mode 100644 src/preprocessing/yml/createSingleBam.yml create mode 100644 src/preprocessing/yml/getfiles.yml diff --git a/src/preprocessing/cwl/createSingleBam-wf.cwl b/src/preprocessing/cwl/createSingleBam-wf.cwl new file mode 100644 index 0000000..c539025 --- /dev/null +++ b/src/preprocessing/cwl/createSingleBam-wf.cwl @@ -0,0 +1,38 @@ +cwlVersion: v1.1 +class: Workflow +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" + +requirements: + ScatterFeatureRequirement: {} + +inputs: + bamdir: + type: Directory + label: Directory of zipped bam files + +outputs: + fastqs: + type: + type: array + items: + type: array + items: File + outputSource: convert-bams/fastqs + +steps: + get-bams: + run: getfiles.cwl + in: + bamdir: bamdir + out: [tarzipbams,samplenames] + + convert-bams: + run: createSingleBam.cwl + scatter: [tarzipbam,samplename] + scatterMethod: dotproduct + in: + tarzipbam: get-bams/tarzipbams + samplename: get-bams/samplenames + out: [fastqs] diff --git a/src/preprocessing/cwl/createSingleBam.cwl b/src/preprocessing/cwl/createSingleBam.cwl new file mode 100644 index 0000000..15994b2 --- /dev/null +++ b/src/preprocessing/cwl/createSingleBam.cwl @@ -0,0 +1,45 @@ +cwlVersion: v1.1 +class: CommandLineTool +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" + +requirements: + - class: DockerRequirement + dockerPull: curii/bam2fastq + - class: ShellCommandRequirement + +hints: + ResourceRequirement: + ramMin: 4000 + coresMin: 1 + tmpdirMin: 150000 + arv:RuntimeConstraints: + outputDirType: keep_output_dir + +inputs: + tarzipbam: File + samplename: string + +outputs: + fastqs: + type: File[] + outputBinding: + glob: "*fastq" + +arguments: + - tar + - prefix: "-xvf" + valueFrom: $(inputs.tarzipbam.path) + - prefix: "-C" + valueFrom: $(runtime.tmpdir) + - {valueFrom: '&&', shellQuote: false} + - samtools + - merge + - {valueFrom: $(runtime.tmpdir)/$(inputs.samplename).bam, shellQuote: false} + - {valueFrom: $(runtime.tmpdir)/*.bam, shellQuote: false} + - {valueFrom: '&&', shellQuote: false} + - /bam2fastq/bam2fastq + - {valueFrom: $(runtime.tmpdir)/$(inputs.samplename).bam, shellQuote: false} + - prefix: "-o" + valueFrom: $(inputs.samplename)_R#.fastq diff --git a/src/preprocessing/cwl/getfiles.cwl b/src/preprocessing/cwl/getfiles.cwl new file mode 100644 index 0000000..3d8d3fd --- /dev/null +++ b/src/preprocessing/cwl/getfiles.cwl @@ -0,0 +1,40 @@ +$namespaces: + cwltool: "http://commonwl.org/cwltool#" +class: ExpressionTool +label: Create list of bams from directory +cwlVersion: v1.1 +requirements: + InlineJavascriptRequirement: {} + +inputs: + bamdir: + type: Directory + label: Directory of input bams + loadListing: shallow_listing + +outputs: + tarzipbams: + type: File[] + label: Array of bams + samplenames: + type: string[] + label: Array of sample names + +expression: | + ${ + var tarzipbams = []; + var samplenames = []; + + for (var i = 0; i < inputs.bamdir.listing.length; i++) { + var file = inputs.bamdir.listing[i]; + if (file.nameext == '.tgz') { + var main = file; + var sample = file.nameroot; + sample = sample.replace(".bam","") + tarzipbams.push(main); + samplenames.push(sample); + } + } + return {"tarzipbams": tarzipbams, "samplenames": samplenames}; + } + diff --git a/src/preprocessing/runcommand b/src/preprocessing/runcommand new file mode 100644 index 0000000..659c7aa --- /dev/null +++ b/src/preprocessing/runcommand @@ -0,0 +1,6 @@ +tar -xvf ./keep/by_id/d25f1c7905d1b16ab8c799a37fc96d56+1805224/hu007B82_1YB27IM.bam.tgz + +samtools merge WGC071838D.bam WGC071838D_combined.chr*bam +docker run -ti -v=/home/bcosca2/issues/15996/bam/7HL58AX:/7HL58AX curii/bam2fastq +/bam2fastq/bam2fastq WGC071838D.bam + diff --git a/src/preprocessing/yml/createSingleBam-wf.yml b/src/preprocessing/yml/createSingleBam-wf.yml new file mode 100644 index 0000000..32cb9ef --- /dev/null +++ b/src/preprocessing/yml/createSingleBam-wf.yml @@ -0,0 +1,3 @@ +bamdir: + class: Directory + location: keep:d25f1c7905d1b16ab8c799a37fc96d56+1805224 diff --git a/src/preprocessing/yml/createSingleBam.yml b/src/preprocessing/yml/createSingleBam.yml new file mode 100644 index 0000000..57bfb13 --- /dev/null +++ b/src/preprocessing/yml/createSingleBam.yml @@ -0,0 +1,5 @@ +tarzipbam: + class: File + location: keep:d25f1c7905d1b16ab8c799a37fc96d56+1805224/hu007B82_1YB27IM.bam.tgz + +samplename: hu007B82_1YB27IM diff --git a/src/preprocessing/yml/getfiles.yml b/src/preprocessing/yml/getfiles.yml new file mode 100644 index 0000000..32cb9ef --- /dev/null +++ b/src/preprocessing/yml/getfiles.yml @@ -0,0 +1,3 @@ +bamdir: + class: Directory + location: keep:d25f1c7905d1b16ab8c799a37fc96d56+1805224 -- 2.30.2