updating for metadata and yml to check command line cwl
authorSarah Wait Zaranek <swz@curii.com>
Mon, 29 Jun 2020 21:42:02 +0000 (21:42 +0000)
committerSarah Wait Zaranek <swz@curii.com>
Mon, 29 Jun 2020 21:42:02 +0000 (21:42 +0000)
Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek <swz@curii.com>
no issue #

WGS-processing/cwl/helper/gatk-haplotypecaller-with-interval.cwl
WGS-processing/cwl/helper/gatk-selectvariants.cwl
WGS-processing/cwl/helper/gatk-wf-with-interval.cwl
WGS-processing/cwl/helper/report-wf.cwl
WGS-processing/cwl/helper/scatter-gatk-wf-with-interval.cwl
WGS-processing/cwl/wgs-processing-wf.cwl
WGS-processing/yml/helper/gatk-baserecalibrator-with-interval.yml [moved from WGS-processing/yml/helper/gatk-baserecalibrator.yml with 75% similarity]
WGS-processing/yml/helper/scatter-gatk-wf-with-interval.yml

index 88c78e04654581896209c162a7c0dc7101519887..22ce3f6b8a3521d6bca2308b91dfb8b3949f5835 100644 (file)
@@ -1,10 +1,6 @@
 cwlVersion: v1.1
 class: CommandLineTool
-label: Germline variant calling using GATK with output gvcf
-
-$namespaces:
-  arv: "http://arvados.org/cwl#"
-  cwltool: "http://commonwl.org/cwltool#"
+label: Call variants with GATK HaplotypeCaller
 
 requirements:
   DockerRequirement:
@@ -17,14 +13,23 @@ hints:
   ResourceRequirement:
     ramMin: 3500
     coresMin: 2   
+  SoftwareRequirement:
+    packages:
+      GATK:
+        specs: [ "https://identifiers.org/rrid/RRID:SCR_001876" ]
+        version: [ "4.1.7" ]
 
 inputs:
   bam:
     type: File
+    format: edam:format_2572 # BAM
+    label: Recalibrated BAM for given interval
     secondaryFiles:
       - .bai
   reference:
     type: File
+    format: edam:format_1929 # FASTA
+    label: Reference genome
     secondaryFiles:
       - .amb
       - .ann
@@ -35,11 +40,16 @@ inputs:
       - ^.dict
   intervallist:
     type: File
-  sample: string
+    label: Scatter intervals file
+  sample: 
+    type: string
+    label: Sample Name
 
 outputs:
   gvcf:
     type: File
+    format: edam:format_3016 # GVCF
+    label: GVCF for given interval
     secondaryFiles:
       - .tbi
     outputBinding:
@@ -67,3 +77,16 @@ arguments:
     valueFrom: "20"
   - prefix: "-GQB"
     valueFrom: "60"
+
+s:codeRepository: https://github.com/arvados/arvados-tutorial
+s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
+
+$namespaces:
+ s: https://schema.org/
+ edam: http://edamontology.org/
+ arv: "http://arvados.org/cwl#"
+ cwltool: "http://commonwl.org/cwltool#"
+
+$schemas:
+ - https://schema.org/version/latest/schema.rdf
+ - http://edamontology.org/EDAM_1.18.owl
index 66b5d5c0bb552cb89237881dbd54a41dff198e76..8e7031abe7581905650ac2cd220689357856aaf5 100644 (file)
@@ -1,9 +1,6 @@
 cwlVersion: v1.1
 class: CommandLineTool
-
-$namespaces:
-  arv: "http://arvados.org/cwl#"
-  cwltool: "http://commonwl.org/cwltool#"
+label: Filter unused alternates 
 
 requirements:
   DockerRequirement:
@@ -16,14 +13,23 @@ hints:
   ResourceRequirement:
     ramMin: 5000
     coresMin: 2
+  SoftwareRequirement:
+    packages:
+      GATK:
+        specs: [ "https://identifiers.org/rrid/RRID:SCR_001876" ]
+        version: [ "4.1.7" ]
 
 inputs:
   gvcf:
     type: File
+    format: edam:format_3016 # GVCF
+    label: GVCF for given interval
     secondaryFiles:
       - .tbi
   reference:
     type: File
+    format: edam:format_1929 # FASTA
+    label: Reference genome
     secondaryFiles:
       - .amb
       - .ann
@@ -32,11 +38,15 @@ inputs:
       - .sa
       - .fai
       - ^.dict
-  sample: string
+  sample: 
+    type: string
+    label: Sample Name
 
 outputs:
   filteredgvcf:
     type: File
+    format: edam:format_3016 # GVCF
+    label: Given interval filtered GVCF 
     outputBinding:
       glob: "*g.vcf.gz"
 
@@ -54,3 +64,16 @@ arguments:
     valueFrom: $(inputs.gvcf.path)
   - prefix: "-O"
     valueFrom: selected$(inputs.gvcf.basename)
+
+s:codeRepository: https://github.com/arvados/arvados-tutorial
+s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
+
+$namespaces:
+ s: https://schema.org/
+ edam: http://edamontology.org/
+ arv: "http://arvados.org/cwl#"
+ cwltool: "http://commonwl.org/cwltool#"
+
+$schemas:
+ - https://schema.org/version/latest/schema.rdf
+ - http://edamontology.org/EDAM_1.18.owl
index ef902099c066015699866acad47b4129da3ffcc1..cf815a5178cb9ab03c769f5acc11de7fe5f8fd8e 100644 (file)
@@ -1,6 +1,6 @@
 cwlVersion: v1.1
 class: Workflow
-label: Variant calling workflow for single interval 
+label: Variant calling workflow for given interval 
 
 requirements:
   - class: SubworkflowFeatureRequirement
@@ -41,7 +41,7 @@ outputs:
   gvcf:
     type: File
     format: edam:format_3016 # GVCF
-    label: single interval filtered GVCF
+    label: Given interval filtered GVCF
     outputSource: selectvariants/filteredgvcf
 
 steps:
index de8ab3f67ce9b20bfd392a1ce31c2300926a00a6..30857f46e7eaeb1cfa285d16dc1bff19b786ddfb 100644 (file)
@@ -1,5 +1,6 @@
 cwlVersion: v1.1
 class: Workflow
+label: Report generation workflow
 
 inputs:
   gvcf: 
index d740222db6e399a993ffb81d1767d80ba001426f..17bb13eb977846d37909e111188f5e2ebde3dbce 100644 (file)
@@ -1,6 +1,6 @@
 cwlVersion: v1.1
 class: Workflow
-label: Variant calling workflow scattered over interval splits 
+label: Scattered variant calling workflow  
 
 requirements:
   - class: SubworkflowFeatureRequirement
index 9c1e78a86d17e8b74a6422b732160be57feda4c0..474a5e308fcd7fc0a3a671d579baa59891fa92fa 100644 (file)
@@ -40,11 +40,11 @@ inputs:
     label: Function used to create HTML report
   headhtml: 
     type: File
-    format: edam:format_1964 # HTML
+    format: edam:format_2331# HTML
     label: Header for HTML report
   tailhtml: 
     type: File
-    format: edam:format_1964 # HTML
+    format: edam:format_2331 # HTML
     label: Footer for HTML report
 
 outputs:
similarity index 75%
rename from WGS-processing/yml/helper/gatk-baserecalibrator.yml
rename to WGS-processing/yml/helper/gatk-baserecalibrator-with-interval.yml
index d4b3c50a6ffa7ebaeb74c94f1d4c966a7a5452f6..7e6b16a9696b70aa713f604c4b96bcbac81873c5 100644 (file)
@@ -12,3 +12,6 @@ knownsites1:
   class: File
   location: keep:7c0b13bda857fa15d88c1039182f69d5+8052/Homo_sapiens_assembly38.known_indels.vcf.gz
 
+intervallist:
+  class: File
+  location: keep:4d4947f6637f0acec32580e09d991967+815/0000-scattered.interval_list
index 4909ce25bebd48ad6f446ef8ec9e5303d8e8dde9..f6d49433fc0f88f263e1d51665c7732e7e3eb95d 100644 (file)
@@ -1,11 +1,15 @@
 bam:
   class: File
   location: keep:377c02de508dff9e4474f9a48d754933+53821/marked_dupsERR1726424.sorted.bam 
+
 reference:
   class: File
   location: keep:a3af04432df3d71d22f2fe8be549ba96+5974/hg38.fa
+
 sample: ERR1726424
+
 scattercount: '32'
+
 knownsites1:
   class: File
   location: keep:7c0b13bda857fa15d88c1039182f69d5+8052/Homo_sapiens_assembly38.known_indels.vcf.gz