From: Sarah Wait Zaranek Date: Wed, 17 Jun 2020 17:13:54 +0000 (+0000) Subject: Add scatter wf for overall scatter X-Git-Url: https://git.arvados.org/arvados-tutorial.git/commitdiff_plain/f8190b9075da8df466cd1faa59840619e9233b7c Add scatter wf for overall scatter Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek no issue # --- diff --git a/WGS-processing/cwl/getfastq.cwl b/WGS-processing/cwl/getfastq.cwl deleted file mode 100644 index a38bc91..0000000 --- a/WGS-processing/cwl/getfastq.cwl +++ /dev/null @@ -1,49 +0,0 @@ -$namespaces: - arv: "http://arvados.org/cwl#" - cwltool: "http://commonwl.org/cwltool#" -class: ExpressionTool -cwlVersion: v1.1 -label: Create array of gvcfs to process -requirements: - InlineJavascriptRequirement: {} -inputs: - fastjdir: - type: Directory - label: Input directory of fastj - loadListing: 'shallow_listing' -outputs: - fastq1: - type: File[] - fastq2: - type: File[] -expression: | - ${function compare(a, b) { - var baseA = a.basename; - var baseB = b.basename; - - var comparison = 0; - if (baseA > baseB) { - comparison = 1; - } else if (baseA < baseB) { - comparison = -1; - } - return comparison; - } - - var fastq1 = []; - var fastq2 = []; - for (var i = 0; i < inputs.fastjdir.listing.length; i++) { - var name = inputs.fastjdir.listing[i]; - if (name.basename.indexOf('_1.fastq.gz') != -1 ) { - fastq1.push(name); - } - if (name.basename.indexOf('_2.fastq.gz') != -1 ) { - fastq2.push(name); - } - } - - fastq1 = fastq1.sort(compare) - fastq2 = fastq2.sort(compare) - - return {"fastq1": fastq1, "fastq2": fastq2}; - } diff --git a/WGS-processing/cwl/helper/.scatter-gatk-wf-with-interval.cwl.swp b/WGS-processing/cwl/helper/.scatter-gatk-wf-with-interval.cwl.swp new file mode 100644 index 0000000..6973e8c Binary files /dev/null and b/WGS-processing/cwl/helper/.scatter-gatk-wf-with-interval.cwl.swp differ diff --git a/WGS-processing/cwl/helper/bwa-gatk-report-wf.cwl b/WGS-processing/cwl/helper/bwamem-gatk-report-wf.cwl similarity index 100% rename from WGS-processing/cwl/helper/bwa-gatk-report-wf.cwl rename to WGS-processing/cwl/helper/bwamem-gatk-report-wf.cwl diff --git a/WGS-processing/cwl/helper/getgvcfs.cwl b/WGS-processing/cwl/helper/not-in-use/getgvcfs.cwl similarity index 100% rename from WGS-processing/cwl/helper/getgvcfs.cwl rename to WGS-processing/cwl/helper/not-in-use/getgvcfs.cwl diff --git a/WGS-processing/cwl/wgs-processing-wf.cwl b/WGS-processing/cwl/wgs-processing-wf.cwl index 0a4b9d2..f7d2d00 100644 --- a/WGS-processing/cwl/wgs-processing-wf.cwl +++ b/WGS-processing/cwl/wgs-processing-wf.cwl @@ -3,10 +3,10 @@ class: Workflow requirements: - class: SubworkflowFeatureRequirement + - class: ScatterFeatureRequirement inputs: - fastq1: File - fastq2: File + fastqdir: Directory reference: type: File secondaryFiles: @@ -29,65 +29,33 @@ inputs: tailhtml: File outputs: - qc-html: - type: File[] - outputSource: fastqc/out-html - qc-zip: - type: File[] - outputSource: fastqc/out-zip gvcf: - type: File - outputSource: haplotypecaller/gatheredgvcf + type: File[] + outputSource: bwamem-gatk-report/gvcf report: - type: File - outputSource: generate-report/report + type: File[] + outputSource: bwamem-gatk-report/report + steps: - fastqc: - run: fastqc.cwl - in: - fastq1: fastq1 - fastq2: fastq2 - out: [out-html, out-zip] - bwamem-samtools-view: - run: bwamem-samtools-view.cwl - in: - fastq1: fastq1 - fastq2: fastq2 - reference: reference - sample: sample - out: [bam] - samtools-sort: - run: samtools-sort.cwl - in: - bam: bwamem-samtools-view/bam - sample: sample - out: [sortedbam] - mark-duplicates: - run: mark-duplicates.cwl + getfastq: + run: ./helper/getfastq.cwl in: - bam: samtools-sort/sortedbam - out: [dupbam,dupmetrics] - samtools-index: - run: samtools-index.cwl - in: - bam: mark-duplicates/dupbam - out: [indexedbam] - haplotypecaller: - run: scatter-gatk-wf-with-interval.cwl + fastqdir: fastqdir + out: [fastq1, fastq2] + + bwamem-gatk-report: + run: ./helper/bwamem-gatk-report-wf.cwl + scatter: [fastq1, fastq2] + scatterMethod: dotproduct in: + fastq1: getfastq/fastq1 + fastq2: getfastq/fastq2 reference: reference - bam: samtools-index/indexedbam sample: sample + knownsites: knownsites scattercount: scattercount - knownsites1: knownsites - out: [gatheredgvcf] - generate-report: - run: report-wf.cwl - in: - gvcf: haplotypecaller/gatheredgvcf - samplename: sample clinvarvcf: clinvarvcf reportfunc: reportfunc headhtml: headhtml tailhtml: tailhtml - out: [report] + out: [qc-html,qc-zip,gvcf,report]