updating preprocessing code
authorSarah Wait Zaranek <swz@curii.com>
Wed, 1 Apr 2020 03:38:30 +0000 (03:38 +0000)
committerWard Vandewege <ward@jhvc.com>
Thu, 18 Jun 2020 15:16:39 +0000 (11:16 -0400)
Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek <swz@curii.com>
no issue #

src/preprocessing/cwl/createSingleBam-wf.cwl [new file with mode: 0644]
src/preprocessing/cwl/createSingleBam.cwl [new file with mode: 0644]
src/preprocessing/cwl/getfiles.cwl [new file with mode: 0644]
src/preprocessing/runcommand [new file with mode: 0644]
src/preprocessing/yml/createSingleBam-wf.yml [new file with mode: 0644]
src/preprocessing/yml/createSingleBam.yml [new file with mode: 0644]
src/preprocessing/yml/getfiles.yml [new file with mode: 0644]

diff --git a/src/preprocessing/cwl/createSingleBam-wf.cwl b/src/preprocessing/cwl/createSingleBam-wf.cwl
new file mode 100644 (file)
index 0000000..c539025
--- /dev/null
@@ -0,0 +1,38 @@
+cwlVersion: v1.1
+class: Workflow 
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
+requirements:
+  ScatterFeatureRequirement: {}
+
+inputs:
+  bamdir:
+    type: Directory 
+    label: Directory of zipped bam files
+
+outputs:
+  fastqs:
+    type: 
+      type: array
+      items:
+        type: array
+        items: File 
+    outputSource: convert-bams/fastqs
+
+steps:
+  get-bams:
+    run: getfiles.cwl
+    in:
+      bamdir: bamdir
+    out: [tarzipbams,samplenames]
+
+  convert-bams:
+    run: createSingleBam.cwl
+    scatter: [tarzipbam,samplename]
+    scatterMethod: dotproduct
+    in:
+      tarzipbam: get-bams/tarzipbams
+      samplename: get-bams/samplenames
+    out: [fastqs]
diff --git a/src/preprocessing/cwl/createSingleBam.cwl b/src/preprocessing/cwl/createSingleBam.cwl
new file mode 100644 (file)
index 0000000..15994b2
--- /dev/null
@@ -0,0 +1,45 @@
+cwlVersion: v1.1
+class: CommandLineTool
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
+requirements:
+  - class: DockerRequirement
+    dockerPull: curii/bam2fastq
+  - class: ShellCommandRequirement
+
+hints:
+  ResourceRequirement:
+    ramMin: 4000
+    coresMin: 1 
+    tmpdirMin: 150000 
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+
+inputs:
+  tarzipbam: File
+  samplename: string
+
+outputs:
+  fastqs:
+    type: File[]
+    outputBinding:
+      glob: "*fastq"
+
+arguments:
+  - tar
+  - prefix: "-xvf" 
+    valueFrom: $(inputs.tarzipbam.path)
+  - prefix: "-C"
+    valueFrom: $(runtime.tmpdir)
+  - {valueFrom: '&&', shellQuote: false}
+  - samtools
+  - merge
+  - {valueFrom: $(runtime.tmpdir)/$(inputs.samplename).bam, shellQuote: false}
+  - {valueFrom: $(runtime.tmpdir)/*.bam, shellQuote: false}
+  - {valueFrom: '&&', shellQuote: false}
+  - /bam2fastq/bam2fastq  
+  - {valueFrom: $(runtime.tmpdir)/$(inputs.samplename).bam, shellQuote: false}
+  - prefix: "-o"
+    valueFrom: $(inputs.samplename)_R#.fastq
diff --git a/src/preprocessing/cwl/getfiles.cwl b/src/preprocessing/cwl/getfiles.cwl
new file mode 100644 (file)
index 0000000..3d8d3fd
--- /dev/null
@@ -0,0 +1,40 @@
+$namespaces:
+  cwltool: "http://commonwl.org/cwltool#"
+class: ExpressionTool
+label: Create list of bams from directory
+cwlVersion: v1.1
+requirements:
+  InlineJavascriptRequirement: {}
+
+inputs:
+  bamdir:
+    type: Directory
+    label: Directory of input bams
+    loadListing: shallow_listing
+outputs:
+  tarzipbams:
+    type: File[]
+    label: Array of bams 
+  samplenames:
+    type: string[]
+    label: Array of sample names
+
+expression: |
+  ${
+    var tarzipbams = [];
+    var samplenames = [];
+
+    for (var i = 0; i < inputs.bamdir.listing.length; i++) {
+      var file = inputs.bamdir.listing[i];
+      if (file.nameext == '.tgz') {
+        var main = file;
+        var sample = file.nameroot;
+        sample = sample.replace(".bam","")
+        tarzipbams.push(main);
+        samplenames.push(sample);
+      }
+    }
+    return {"tarzipbams": tarzipbams, "samplenames": samplenames};
+  }
+
diff --git a/src/preprocessing/runcommand b/src/preprocessing/runcommand
new file mode 100644 (file)
index 0000000..659c7aa
--- /dev/null
@@ -0,0 +1,6 @@
+tar -xvf ./keep/by_id/d25f1c7905d1b16ab8c799a37fc96d56+1805224/hu007B82_1YB27IM.bam.tgz
+
+samtools merge WGC071838D.bam WGC071838D_combined.chr*bam
+docker run -ti -v=/home/bcosca2/issues/15996/bam/7HL58AX:/7HL58AX curii/bam2fastq
+/bam2fastq/bam2fastq  WGC071838D.bam
+
diff --git a/src/preprocessing/yml/createSingleBam-wf.yml b/src/preprocessing/yml/createSingleBam-wf.yml
new file mode 100644 (file)
index 0000000..32cb9ef
--- /dev/null
@@ -0,0 +1,3 @@
+bamdir:
+  class: Directory 
+  location: keep:d25f1c7905d1b16ab8c799a37fc96d56+1805224
diff --git a/src/preprocessing/yml/createSingleBam.yml b/src/preprocessing/yml/createSingleBam.yml
new file mode 100644 (file)
index 0000000..57bfb13
--- /dev/null
@@ -0,0 +1,5 @@
+tarzipbam:
+  class: File
+  location: keep:d25f1c7905d1b16ab8c799a37fc96d56+1805224/hu007B82_1YB27IM.bam.tgz 
+
+samplename: hu007B82_1YB27IM
diff --git a/src/preprocessing/yml/getfiles.yml b/src/preprocessing/yml/getfiles.yml
new file mode 100644 (file)
index 0000000..32cb9ef
--- /dev/null
@@ -0,0 +1,3 @@
+bamdir:
+  class: Directory 
+  location: keep:d25f1c7905d1b16ab8c799a37fc96d56+1805224