Adding labels and other metadata
authorSarah Wait Zaranek <swz@curii.com>
Fri, 26 Jun 2020 01:52:47 +0000 (01:52 +0000)
committerSarah Wait Zaranek <swz@curii.com>
Fri, 26 Jun 2020 01:52:47 +0000 (01:52 +0000)
Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek <swz@curii.com>
no issue #

WGS-processing/cwl/helper/.gatk-splitintervals.cwl.swp [new file with mode: 0644]
WGS-processing/cwl/helper/gatk-baserecalibrator-with-interval.cwl
WGS-processing/cwl/helper/gatk-wf-with-interval.cwl

diff --git a/WGS-processing/cwl/helper/.gatk-splitintervals.cwl.swp b/WGS-processing/cwl/helper/.gatk-splitintervals.cwl.swp
new file mode 100644 (file)
index 0000000..c7eb3dc
Binary files /dev/null and b/WGS-processing/cwl/helper/.gatk-splitintervals.cwl.swp differ
index dec324463d60e5661792a8ad59989b99597c198a..71cb27fa94283c45f2faae389735ad56f27421f8 100644 (file)
@@ -1,10 +1,6 @@
 cwlVersion: v1.1
 class: CommandLineTool
 cwlVersion: v1.1
 class: CommandLineTool
-label: Generating recalibration table for BQSR 
-
-$namespaces:
-  arv: "http://arvados.org/cwl#"
-  cwltool: "http://commonwl.org/cwltool#"
+label: Generate recalibration table for BQSR 
 
 requirements:
   DockerRequirement:
 
 requirements:
   DockerRequirement:
@@ -20,14 +16,23 @@ hints:
   ResourceRequirement:
     ramMin: 5000
     coresMin: 2
   ResourceRequirement:
     ramMin: 5000
     coresMin: 2
+  SoftwareRequirement:
+    packages:
+      GATK:
+        specs: [ "https://identifiers.org/rrid/RRID:SCR_001876" ]
+        version: [ "4.1.7" ]
 
 inputs:
   bam:
     type: File
 
 inputs:
   bam:
     type: File
+    format: edam:format_2572 # BAM
+    label: Indexed sorted BAM with labeled duplicates
     secondaryFiles:
      - .bai
   reference:
     type: File
     secondaryFiles:
      - .bai
   reference:
     type: File
+    format: edam:format_1929 # FASTA
+    label: Reference genome
     secondaryFiles:
       - .amb
       - .ann
     secondaryFiles:
       - .amb
       - .ann
@@ -36,17 +41,23 @@ inputs:
       - .sa
       - .fai
       - ^.dict
       - .sa
       - .fai
       - ^.dict
-  sample: string
+  sample:
+    type: string
+    label: Sample Name
   knownsites1: 
     type: File
   knownsites1: 
     type: File
+    format: edam:format_3016 # VCF
+    label: VCF of known polymorphic sites for BQSR
     secondaryFiles:
       - .tbi
   intervallist:
     type: File
     secondaryFiles:
       - .tbi
   intervallist:
     type: File
+    label: Scatter intervals file
 
 outputs:
   recaltable:
     type: File
 
 outputs:
   recaltable:
     type: File
+    label: Recalibration table
     outputBinding:
       glob: "*.table"
 
     outputBinding:
       glob: "*.table"
 
@@ -66,3 +77,14 @@ arguments:
     valueFrom: $(inputs.intervallist)
   - prefix: "-O"
     valueFrom: $(inputs.sample)_recal_data.table
     valueFrom: $(inputs.intervallist)
   - prefix: "-O"
     valueFrom: $(inputs.sample)_recal_data.table
+
+s:codeRepository: https://github.com/arvados/arvados-tutorial
+s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
+
+$namespaces:
+ s: https://schema.org/
+ edam: http://edamontology.org/
+
+$schemas:
+ - https://schema.org/version/latest/schema.rdf
+ - http://edamontology.org/EDAM_1.18.owl
index e1e54eeef1d39b613f235b7fc4410a9c6414f205..ef902099c066015699866acad47b4129da3ffcc1 100644 (file)
@@ -8,10 +8,14 @@ requirements:
 inputs:
   bam:
     type: File
 inputs:
   bam:
     type: File
+    format: edam:format_2572 # BAM
+    label: Indexed sorted BAM with labeled duplicates
     secondaryFiles:
       - .bai
   reference:
     type: File
     secondaryFiles:
       - .bai
   reference:
     type: File
+    format: edam:format_1929 # FASTA
+    label: Reference genome
     secondaryFiles:
       - .amb
       - .ann
     secondaryFiles:
       - .amb
       - .ann
@@ -20,17 +24,24 @@ inputs:
       - .sa
       - .fai
       - ^.dict
       - .sa
       - .fai
       - ^.dict
-  sample: string
+  sample: 
+    type: string
+    label: Sample Name
   knownsites1:
     type: File
   knownsites1:
     type: File
+    format: edam:format_3016 # VCF
+    label: VCF of known polymorphic sites for BQSR
     secondaryFiles:
       - .tbi
   intervallist:
     type: File
     secondaryFiles:
       - .tbi
   intervallist:
     type: File
+    label: Scatter intervals file 
 
 outputs:
   gvcf:
     type: File
 
 outputs:
   gvcf:
     type: File
+    format: edam:format_3016 # GVCF
+    label: single interval filtered GVCF
     outputSource: selectvariants/filteredgvcf
 
 steps:
     outputSource: selectvariants/filteredgvcf
 
 steps: