updating cwl
authorSarah Wait Zaranek <swz@curii.com>
Tue, 21 Apr 2020 01:31:20 +0000 (01:31 +0000)
committerWard Vandewege <ward@jhvc.com>
Thu, 18 Jun 2020 15:16:39 +0000 (11:16 -0400)
Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek <swz@curii.com>
no issue #

cwl/bwamem-samtools-view.cwl
cwl/calc-coverage.cwl [new file with mode: 0644]
cwl/check-sam.cwl [new file with mode: 0644]
cwl/fix-groups.cwl [new file with mode: 0644]
cwl/gatk-haplotypecaller.cwl
cwl/mark-duplicates.cwl [new file with mode: 0644]
cwl/old/bwa-gatk-wf.cwl [new file with mode: 0644]
cwl/old/samtools-sort.cwl [new file with mode: 0644]
cwl/samtools-fixmate.cwl [new file with mode: 0644]
cwl/samtools-index.cwl
cwl/samtools-sort.cwl

index 044ef74e524732397d06b052087ea288c59a33c7..efc423e27db676afea44d840a826cff7e7584c51 100644 (file)
@@ -1,6 +1,9 @@
 cwlVersion: v1.1
 class: CommandLineTool
 label: Realigning fastqs and converting output to bam
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
 
 requirements:
   DockerRequirement:
@@ -8,7 +11,11 @@ requirements:
   ShellCommandRequirement: {}
   ResourceRequirement:
     ramMin: 26000
-    coresMin: 8
+    coresMin: 16 
+
+hints:
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
 
 inputs:
   reference:
@@ -36,17 +43,22 @@ outputs:
 arguments:
   - /bwa-0.7.17/bwa
   - mem
+  - -M
   - -t
   - $(runtime.cores)
   - $(inputs.reference)
   - -R
-  - "@RG\\tID:sample\\tSM:sample\\tLB:sample\\tPL:ILLUMINA"
+  - '@RG\tID:sample\tSM:sample\tLB:sample\tPL:ILLUMINA\tPU:sample1' 
+  - -c
+  - 250
   - $(inputs.fastq1)
   - $(inputs.fastq2)
   - shellQuote: false
     valueFrom: '|'
   - samtools
   - view
+  - -@
+  - $(runtime.cores)
   - -b
   - -S
   - shellQuote: false
diff --git a/cwl/calc-coverage.cwl b/cwl/calc-coverage.cwl
new file mode 100644 (file)
index 0000000..b1e9373
--- /dev/null
@@ -0,0 +1,39 @@
+cwlVersion: v1.1
+class: CommandLineTool
+label: Marking Duplicates using Picard 
+
+requirements:
+  DockerRequirement:
+    dockerPull: broadinstitute/gatk
+
+hints:
+  ResourceRequirement:
+    ramMin: 20000
+    coresMin: 4    
+
+inputs:
+  bam:
+    type: File
+
+outputs:
+  dupbam:
+    type: File
+    outputBinding:
+      glob: "*.bam"
+  dupmetrics:
+    type: File
+    outputBinding:
+      glob: "*.txt"
+
+baseCommand: /gatk/gatk
+
+arguments:
+  - "--java-options"
+  - "-Xmx8G" 
+  - MarkDuplicates
+  - prefix: "-I"
+    valueFrom: $(inputs.bam.path)
+  - prefix: "-O"
+    valueFrom: marked_dups$(inputs.bam.basename)
+  - prefix: "-M"
+    valueFrom: "metrics.txt" 
diff --git a/cwl/check-sam.cwl b/cwl/check-sam.cwl
new file mode 100644 (file)
index 0000000..df15439
--- /dev/null
@@ -0,0 +1,51 @@
+cwlVersion: v1.1
+class: CommandLineTool
+label: Validate Sam using Picard 
+
+requirements:
+  DockerRequirement:
+    dockerPull: broadinstitute/gatk
+  InitialWorkDirRequirement:
+    listing:
+      - $(inputs.sam)
+
+hints:
+  ResourceRequirement:
+    ramMin: 20000
+    coresMin: 4    
+
+inputs:
+  sam:
+    type: File
+
+  reference:
+    type: File
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+
+outputs:
+  metrics:
+    type: File
+    outputBinding:
+      glob: "*.txt"
+
+baseCommand: /gatk/gatk
+
+arguments:
+  - "--java-options"
+  - "-Xmx8G" 
+  - ValidateSamFile 
+  - prefix: "-I"
+    valueFrom: $(inputs.sam.basename)
+  - prefix: "-M"
+    valueFrom: "SUMMARY"
+  - prefix: "-O"
+    valueFrom: "metrics.txt" 
+  - prefix: "-R"
+    valueFrom: $(inputs.reference.path)
diff --git a/cwl/fix-groups.cwl b/cwl/fix-groups.cwl
new file mode 100644 (file)
index 0000000..475814e
--- /dev/null
@@ -0,0 +1,43 @@
+cwlVersion: v1.1
+class: CommandLineTool
+label: Marking Duplicates using Picard 
+
+requirements:
+  DockerRequirement:
+    dockerPull: broadinstitute/gatk
+
+hints:
+  ResourceRequirement:
+    ramMin: 20000
+    coresMin: 4    
+
+inputs:
+  bam:
+    type: File
+
+outputs:
+  fixedrgbam:
+    type: File
+    outputBinding:
+      glob: "*.bam"
+
+baseCommand: /gatk/gatk
+
+arguments:
+  - "--java-options"
+  - "-Xmx8G" 
+  - AddOrReplaceReadGroups 
+  - prefix: "-I"
+    valueFrom: $(inputs.bam.path)
+  - prefix: "-O"
+    valueFrom: fixedrg$(inputs.bam.basename)
+  - prefix: "-ID"
+    valueFrom: "H0164.2"
+  - prefix: "-LB"
+    valueFrom: "library1"
+  - prefix: "-PL"
+    valueFrom: "illumina"
+  - prefix: "-PU"
+    valueFrom: "H0164ALXX140820.2"
+  - prefix: "-SM"
+    valueFrom: "sample1"
index f10178301ae677c272e949e81bae0ff1dfd41689..9679d2af48df4ba1a117a102d2b72ae889dec41d 100644 (file)
@@ -2,6 +2,10 @@ cwlVersion: v1.1
 class: CommandLineTool
 label: Germline variant calling using GATK with output gvcf
 
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
 requirements:
   DockerRequirement:
     dockerPull: broadinstitute/gatk
@@ -10,9 +14,12 @@ requirements:
       - $(inputs.bam)
 
 hints:
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+    keep_cache: 1024 
   ResourceRequirement:
-    ramMin: 20000
-    coresMin: 4    
+    ramMin: 5000
+    coresMin: 2    
 
 inputs:
   bam:
@@ -41,7 +48,7 @@ baseCommand: /gatk/gatk
 
 arguments:
   - "--java-options"
-  - "-Xmx8G" 
+  - "-Xmx4G" 
   - HaplotypeCaller
   - prefix: "-R"
     valueFrom: $(inputs.reference)
diff --git a/cwl/mark-duplicates.cwl b/cwl/mark-duplicates.cwl
new file mode 100644 (file)
index 0000000..8cdbb9b
--- /dev/null
@@ -0,0 +1,44 @@
+cwlVersion: v1.1
+class: CommandLineTool
+label: Marking Duplicates using Picard 
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
+requirements:
+  DockerRequirement:
+    dockerPull: broadinstitute/gatk
+
+hints:
+  ResourceRequirement:
+    ramMin: 20000
+    coresMin: 4    
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+
+inputs:
+  bam:
+    type: File
+
+outputs:
+  dupbam:
+    type: File
+    outputBinding:
+      glob: "*.bam"
+  dupmetrics:
+    type: File
+    outputBinding:
+      glob: "*.txt"
+
+baseCommand: /gatk/gatk
+
+arguments:
+  - "--java-options"
+  - "-Xmx8G" 
+  - MarkDuplicates
+  - prefix: "-I"
+    valueFrom: $(inputs.bam.path)
+  - prefix: "-O"
+    valueFrom: marked_dups$(inputs.bam.basename)
+  - prefix: "-M"
+    valueFrom: "metrics.txt" 
diff --git a/cwl/old/bwa-gatk-wf.cwl b/cwl/old/bwa-gatk-wf.cwl
new file mode 100644 (file)
index 0000000..56c1758
--- /dev/null
@@ -0,0 +1,65 @@
+cwlVersion: v1.1
+class: Workflow
+
+requirements:
+  - class: SubworkflowFeatureRequirement
+
+inputs:
+  fastq1: File
+  fastq2: File
+  reference:
+    type: File
+    secondaryFiles:
+      - .amb
+      - .ann
+      - .bwt
+      - .pac
+      - .sa
+      - .fai
+      - ^.dict
+  sample: string
+
+outputs:
+  qc-html:
+    type: File[]
+    outputSource: fastqc/out-html
+  qc-zip:
+    type: File[]
+    outputSource: fastqc/out-zip 
+  vcf:
+    type: File
+    outputSource: haplotypecaller/vcf
+
+steps:
+  fastqc:
+    run: fastqc.cwl
+    in:
+      fastq1: fastq1
+      fastq2: fastq2
+    out: [out-html, out-zip]
+  bwamem-samtools-view:
+    run: bwamem-samtools-view.cwl
+    in:
+      fastq1: fastq1
+      fastq2: fastq2
+      reference: reference
+      sample: sample
+    out: [bam]
+  samtools-sort:
+    run: samtools-sort.cwl
+    in:
+      bam: bwamem-samtools-view/bam
+      sample: sample
+    out: [out]
+  samtools-index:
+    run: samtools-index.cwl
+    in:
+      bam: samtools-sort/out
+    out: [out]
+  haplotypecaller:
+    run: gatk-haplotypecaller.cwl
+    in:
+      reference: reference
+      bam: samtools-index/out
+      sample: sample
+    out: [vcf]
diff --git a/cwl/old/samtools-sort.cwl b/cwl/old/samtools-sort.cwl
new file mode 100644 (file)
index 0000000..cc7c23f
--- /dev/null
@@ -0,0 +1,30 @@
+cwlVersion: v1.1
+class: CommandLineTool
+
+requirements:
+  DockerRequirement:
+    dockerPull: curii/bwa-samtools-picard
+  ShellCommandRequirement: {}
+  ResourceRequirement:
+    ramMin: 10000
+    coresMin: 4
+
+inputs:
+  bam: File
+  sample: string
+
+outputs:
+  out:
+    type: File
+    outputBinding:
+      glob: "*sorted.bam"
+
+baseCommand: samtools
+
+arguments:
+  - sort
+  - -t
+  - $(runtime.cores)
+  - $(inputs.bam)
+  - -o
+  - $(runtime.outdir)/$(inputs.sample).sorted.bam
diff --git a/cwl/samtools-fixmate.cwl b/cwl/samtools-fixmate.cwl
new file mode 100644 (file)
index 0000000..61d5c1f
--- /dev/null
@@ -0,0 +1,37 @@
+cwlVersion: v1.1
+class: CommandLineTool
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
+requirements:
+  DockerRequirement:
+    dockerPull: curii/bwa-samtools-picard
+  ShellCommandRequirement: {}
+  ResourceRequirement:
+    ramMin: 10000
+    coresMin: 4
+
+hints:
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+
+inputs:
+  bam: File
+  sample: string
+
+outputs:
+  out:
+    type: File
+    outputBinding:
+      glob: "*fixed.bam"
+
+baseCommand: samtools
+
+arguments:
+  - fixmate 
+  - -O
+  - "bam"
+  - $(inputs.bam.path)
+  - $(runtime.outdir)/$(inputs.sample).fixed.bam
index 58ccbf76514879a4967991d7be18702ccd7a76d6..3876f3d2f233e80b19664a42b69c58bfd0eda2a5 100644 (file)
@@ -1,6 +1,10 @@
 cwlVersion: v1.1
 class: CommandLineTool
 
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
 requirements:
   DockerRequirement:
     dockerPull: curii/bwa-samtools-picard
@@ -8,6 +12,10 @@ requirements:
     listing:
       - $(inputs.bam)
 
+hints:
+  arv:RuntimeConstraints:
+    outputDirType: keep_output_dir
+
 inputs:
   bam: File
 
index cc7c23f0535b4c0124652ea36712779cf84da702..c94d681a99aa5b73ef7b29142c99c47ec19746e7 100644 (file)
@@ -1,14 +1,26 @@
 cwlVersion: v1.1
 class: CommandLineTool
 
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+
 requirements:
   DockerRequirement:
     dockerPull: curii/bwa-samtools-picard
   ShellCommandRequirement: {}
+  InitialWorkDirRequirement:
+    listing:
+      - $(inputs.bam)
   ResourceRequirement:
-    ramMin: 10000
+    ramMin: 20000
     coresMin: 4
 
+hints:
+  arv:RuntimeConstraints:
+    keep_cache: 9216 
+    outputDirType: keep_output_dir
+
 inputs:
   bam: File
   sample: string
@@ -23,8 +35,10 @@ baseCommand: samtools
 
 arguments:
   - sort
-  - -t
+  - -@
   - $(runtime.cores)
-  - $(inputs.bam)
+  - $(inputs.bam.basename)
+  - -m
+  - '2G'
   - -o
-  - $(runtime.outdir)/$(inputs.sample).sorted.bam
+  - $(inputs.sample).sorted.bam