Add cwl and docker files
authorJiayong Li <jli@curii.com>
Fri, 9 Dec 2022 21:03:13 +0000 (21:03 +0000)
committerJiayong Li <jli@curii.com>
Fri, 9 Dec 2022 21:03:13 +0000 (21:03 +0000)
refs #19785

Arvados-DCO-1.1-Signed-off-by: Jiayong Li <jli@curii.com>

206 files changed:
.licenseignore
cwl/annotation/annotate-wf.cwl [new file with mode: 0644]
cwl/annotation/bcftools-concat.cwl [new file with mode: 0644]
cwl/annotation/getcount.cwl [new file with mode: 0644]
cwl/annotation/getfiles.cwl [new file with mode: 0644]
cwl/annotation/preprocess.cwl [new file with mode: 0644]
cwl/annotation/snpeff-bcftools-annotate.cwl [new file with mode: 0644]
cwl/annotation/src/getcount.sh [new file with mode: 0755]
cwl/annotation/src/totalcounts.sh [new file with mode: 0755]
cwl/annotation/totalcounts.cwl [new file with mode: 0644]
cwl/annotation/yml/annotate-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml [new file with mode: 0644]
cwl/annotation/yml/preprocess-test.yml [new file with mode: 0644]
cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19.yml [new file with mode: 0644]
cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19_new.yml [new file with mode: 0644]
cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr22.yml [new file with mode: 0644]
cwl/comparevcf/change-GT.cwl [new file with mode: 0644]
cwl/comparevcf/comparevcf-original-lightning-wf.cwl [new file with mode: 0644]
cwl/comparevcf/rtg-vcfeval.cwl [new file with mode: 0644]
cwl/comparevcf/src/change-GT.sh [new file with mode: 0755]
cwl/comparevcf/src/concatenate.sh [new file with mode: 0755]
cwl/comparevcf/yml/comparevcf-original-lightning-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml [new file with mode: 0644]
cwl/gvcf2fasta/append-sampleid.cwl [new file with mode: 0644]
cwl/gvcf2fasta/bcftools-consensus.cwl [new file with mode: 0644]
cwl/gvcf2fasta/concat-get_bed_varonlyvcf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/fixvcf-get_bed_varonlyvcf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/get_bed_varonlyvcf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/getfiles.cwl [new file with mode: 0644]
cwl/gvcf2fasta/gvcf2fasta-wf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-imputation-wf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-wf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/gvcf2fasta_splitvcf-imputation-wf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/gvcf2fasta_splitvcf-wf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/gvcf2fasta_splitvcftar-wf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/scatter-gvcf2fasta-wf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/scatter-gvcf2fasta_nonrefvcf-wf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-imputation-wf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-wf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcftar-wf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/src/bcftools-consensus.sh [new file with mode: 0755]
cwl/gvcf2fasta/src/concat-get_bed_varonlyvcf.sh [new file with mode: 0755]
cwl/gvcf2fasta/src/fixvcf-get_bed_varonlyvcf.sh [new file with mode: 0755]
cwl/gvcf2fasta/src/untar-concat.sh [new file with mode: 0755]
cwl/gvcf2fasta/untar-concat-get_bed_varonlyvcf.cwl [new file with mode: 0644]
cwl/gvcf2fasta/yml/bcftools-consensus-A-IIAA-IA000196-BL-NCR-14AD66938_phased_imputed.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/bcftools-consensus-test-newadni.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/bcftools-consensus-test-varonly.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/bcftools-consensus-test.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/concat-get_bed_varonlyvcf-A-WCAP-WC001544-BL-COL-49161BL1.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/get_bed_varonlyvcf-test.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/gvcf2fasta-wf-test-newadni.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/gvcf2fasta-wf-test.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/gvcf2fasta-wf-test37.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-imputation-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-IIAA-IA000196-BL-NCR-14AD66938.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/gvcf2fasta_splitvcftar-wf-test.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-16gvcf.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-test.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/scatter-gvcf2fasta_splitvcf-imputation-wf-100test.yml [new file with mode: 0644]
cwl/gvcf2fasta/yml/untar-concat-get_bed_varonlyvcf-test.yml [new file with mode: 0644]
cwl/imputation/bcftools-concat.cwl [new file with mode: 0644]
cwl/imputation/beagle.cwl [new file with mode: 0644]
cwl/imputation/bedtools-intersect.cwl [new file with mode: 0644]
cwl/imputation/bedtools-subtract.cwl [new file with mode: 0644]
cwl/imputation/get-imputedvcf.cwl [new file with mode: 0644]
cwl/imputation/get-phasedvcf.cwl [new file with mode: 0644]
cwl/imputation/imputation-wf.cwl [new file with mode: 0644]
cwl/imputation/match-ref-map-chr.cwl [new file with mode: 0644]
cwl/imputation/merge-phased-imputed-wf.cwl [new file with mode: 0644]
cwl/imputation/rtg-vcffilter-bedtools-intersect.cwl [new file with mode: 0644]
cwl/imputation/rtg-vcffilter.cwl [new file with mode: 0644]
cwl/imputation/rtg-vcfmerge.cwl [new file with mode: 0644]
cwl/imputation/scatter-beagle-wf.cwl [new file with mode: 0644]
cwl/imputation/yml/imputation-A-PRHS-PR000971-BL-COL-47620BL1.yml [new file with mode: 0644]
cwl/imputation/yml/rtg-vcffilter-A-PRHS-PR000971-BL-COL-47620BL1.yml [new file with mode: 0644]
cwl/imputation/yml/rtg-vcffilter-test.yml [new file with mode: 0644]
cwl/lightning/batch-dirs.cwl [new file with mode: 0644]
cwl/lightning/fasta2numpy-multi-wf.cwl [new file with mode: 0644]
cwl/lightning/fasta2numpy-wf.cwl [new file with mode: 0644]
cwl/lightning/genreadme.cwl [new file with mode: 0644]
cwl/lightning/imputation-gvcf2fasta-fasta2numpy-wf.cwl [new file with mode: 0644]
cwl/lightning/libray2numpy-wf.cwl [new file with mode: 0644]
cwl/lightning/lightning-anno2vcf.cwl [new file with mode: 0644]
cwl/lightning/lightning-choose-samples.cwl [new file with mode: 0644]
cwl/lightning/lightning-import.cwl [new file with mode: 0644]
cwl/lightning/lightning-plot.cwl [new file with mode: 0644]
cwl/lightning/lightning-slice-numpy-anno2vcf-wf.cwl [new file with mode: 0644]
cwl/lightning/lightning-slice-numpy-onehot.cwl [new file with mode: 0644]
cwl/lightning/lightning-slice-numpy-pca.cwl [new file with mode: 0644]
cwl/lightning/lightning-slice-numpy.cwl [new file with mode: 0644]
cwl/lightning/lightning-slice.cwl [new file with mode: 0644]
cwl/lightning/lightning-tiling-stats.cwl [new file with mode: 0644]
cwl/lightning/make-arrays.cwl [new file with mode: 0644]
cwl/lightning/make-fastadirs.cwl [new file with mode: 0644]
cwl/lightning/make-libname.cwl [new file with mode: 0644]
cwl/lightning/readme.md [new file with mode: 0644]
cwl/lightning/src/genreadme.py [new file with mode: 0755]
cwl/lightning/stage-output.cwl [new file with mode: 0644]
cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315.yml [new file with mode: 0644]
cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315_ADNI.yml [new file with mode: 0644]
cwl/lightning/yml/fasta2numpy-multi-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml [new file with mode: 0644]
cwl/lightning/yml/fasta2numpy-multi-wf-public.yml [new file with mode: 0644]
cwl/lightning/yml/fasta2numpy-multi-wf-test.yml [new file with mode: 0644]
cwl/lightning/yml/fasta2numpy-multi-wf-testonehot.yml [new file with mode: 0644]
cwl/lightning/yml/fasta2numpy-wf-0831_0315.yml [new file with mode: 0644]
cwl/lightning/yml/fasta2numpy-wf-100test.yml [new file with mode: 0644]
cwl/lightning/yml/imputation-gvcf2fasta-fasta2numpy-wf-100test.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-export-numpy-merged.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-import-ref37.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-import-ref38.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-import-testdata.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-merge-testdata_ref38.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-0831_0315.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-test.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-slice-numpy-onehot-A-ADC.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-slice-numpy-onehot-A-CUHS.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-slice-numpy-onehot-A-IIAA.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-slice-numpy-onehot-ADNI.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-slice-numpy-onehot-full.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-slice-numpy-onehot-pvalue1e-8-full.yml [new file with mode: 0644]
cwl/lightning/yml/lightning-tiling-stats-ref37.yml [new file with mode: 0644]
cwl/preprocess/cgivar/bedtools-intersect.cwl [new file with mode: 0644]
cwl/preprocess/cgivar/cgatools-mkvcf.cwl [new file with mode: 0644]
cwl/preprocess/cgivar/cgatools-mkvcf.sh [new file with mode: 0755]
cwl/preprocess/cgivar/cgivar2vcfbed-wf.cwl [new file with mode: 0644]
cwl/preprocess/cgivar/fix_vcf.cwl [new file with mode: 0644]
cwl/preprocess/cgivar/fix_vcf.py [new file with mode: 0755]
cwl/preprocess/cgivar/getfiles.cwl [new file with mode: 0644]
cwl/preprocess/cgivar/gvcf_regions.cwl [new file with mode: 0644]
cwl/preprocess/cgivar/scatter-cgivar2vcfbed-wf.cwl [new file with mode: 0644]
cwl/preprocess/cgivar/yml/cgivar2vcfbed-wf-test.yml [new file with mode: 0644]
cwl/preprocess/cgivar/yml/scatter-cgivar2vcfbed-wf-367set.yml [new file with mode: 0644]
cwl/preprocess/chrmvcf/change_gt_M.js [new file with mode: 0644]
cwl/preprocess/chrmvcf/change_gt_chrM.js [new file with mode: 0644]
cwl/preprocess/chrmvcf/fixchrm-wf.cwl [new file with mode: 0644]
cwl/preprocess/chrmvcf/fixchrm.cwl [new file with mode: 0644]
cwl/preprocess/chrmvcf/getfiles.cwl [new file with mode: 0644]
cwl/preprocess/chrmvcf/yml/fixchrm-test.yml [new file with mode: 0644]
cwl/preprocess/chrmvcf/yml/fixchrm-wf-1kcgi.yml [new file with mode: 0644]
cwl/preprocess/chrmvcf/yml/fixchrm-wf-harvardpgpcgi.yml [new file with mode: 0644]
cwl/preprocess/chrmvcf/yml/fixchrm-wf-simons.yml [new file with mode: 0644]
cwl/preprocess/gvcf/filtercleangvcf-wf.cwl [new file with mode: 0644]
cwl/preprocess/gvcf/filtercleangvcf.cwl [new file with mode: 0644]
cwl/preprocess/gvcf/getfiles.cwl [new file with mode: 0644]
cwl/preprocess/gvcf/src/cleanvcf.py [new file with mode: 0755]
cwl/preprocess/gvcf/src/filter-gvcf [new file with mode: 0755]
cwl/preprocess/gvcf/yml/filtercleangvcf-test.yml [new file with mode: 0644]
cwl/preprocess/gvcf/yml/filtercleangvcf-wf-1kcgi.yml [new file with mode: 0644]
cwl/preprocess/gvcf/yml/filtercleangvcf-wf-vcfbed-test.yml [new file with mode: 0644]
cwl/preprocess/gvcf/yml/keepGQdot-test.yml [new file with mode: 0644]
cwl/preprocess/haploidvcf/change_gt.js [new file with mode: 0644]
cwl/preprocess/haploidvcf/fixgt-wf.cwl [new file with mode: 0644]
cwl/preprocess/haploidvcf/fixgt.cwl [new file with mode: 0644]
cwl/preprocess/haploidvcf/getfiles.cwl [new file with mode: 0644]
cwl/preprocess/haploidvcf/yml/fixgt-test.yml [new file with mode: 0644]
cwl/preprocess/haploidvcf/yml/fixgt-wf-pgpcanada.yml [new file with mode: 0644]
cwl/preprocess/nonrefvcf/fixnonref-wf.cwl [new file with mode: 0644]
cwl/preprocess/nonrefvcf/fixnonref.cwl [new file with mode: 0644]
cwl/preprocess/nonrefvcf/getfiles.cwl [new file with mode: 0644]
cwl/preprocess/nonrefvcf/yml/fixnonref-test.yml [new file with mode: 0644]
cwl/preprocess/nonrefvcf/yml/fixnonref-wf-1kgvcf.yml [new file with mode: 0644]
cwl/preprocess/nonrefvcf/yml/fixnonref-wf-ukpgp.yml [new file with mode: 0644]
cwl/preprocess/portablevcf/bcftools-annotate.cwl [new file with mode: 0644]
cwl/preprocess/portablevcf/bcftools-reheader.cwl [new file with mode: 0644]
cwl/preprocess/portablevcf/cat.cwl [new file with mode: 0644]
cwl/preprocess/portablevcf/getfiles.cwl [new file with mode: 0644]
cwl/preprocess/portablevcf/header [new file with mode: 0644]
cwl/preprocess/portablevcf/preprocess-portablevcf-wf.cwl [new file with mode: 0644]
cwl/preprocess/portablevcf/rtg-vcfeval.cwl [new file with mode: 0644]
cwl/preprocess/portablevcf/scatter-preprocess-portablevcf-wf.cwl [new file with mode: 0644]
cwl/preprocess/portablevcf/sort-clean.cwl [new file with mode: 0644]
cwl/preprocess/portablevcf/yml/bcftools-annotate-test.yml [new file with mode: 0644]
cwl/preprocess/portablevcf/yml/preprocess-portablevcf-wf.yml [new file with mode: 0644]
cwl/preprocess/portablevcf/yml/scatter-preprocess-portablevcf-wf.yml [new file with mode: 0644]
cwl/preprocess/portablevcf/yml/sort-clean.cwl [new file with mode: 0644]
cwl/preprocess/simons/filter-vcf.cwl [new file with mode: 0644]
cwl/preprocess/simons/getfiles.cwl [new file with mode: 0644]
cwl/preprocess/simons/make-bed.cwl [new file with mode: 0644]
cwl/preprocess/simons/make-vcf-bed-wf.cwl [new file with mode: 0644]
cwl/preprocess/simons/yml/filter-vcf-test.yml [new file with mode: 0644]
cwl/preprocess/simons/yml/make-bed-test.yml [new file with mode: 0644]
cwl/preprocess/simons/yml/make-vcf-bed-wf-simons.yml [new file with mode: 0644]
cwl/preprocess/splitvcf/concatvcf-wf.cwl [new file with mode: 0644]
cwl/preprocess/splitvcf/concatvcf.cwl [new file with mode: 0644]
cwl/preprocess/splitvcf/src/concatvcf.sh [new file with mode: 0755]
cwl/preprocess/splitvcf/yml/concatvcf-test.yml [new file with mode: 0644]
cwl/preprocess/splitvcf/yml/concatvcf-wf-test.yml [new file with mode: 0644]
cwl/preprocess/vcfbed/get-vcfbed.cwl [new file with mode: 0644]
cwl/preprocess/vcfbed/intersect-vcfbed.cwl [new file with mode: 0644]
cwl/preprocess/vcfbed/sort-bed.cwl [new file with mode: 0644]
cwl/preprocess/vcfbed/sort-vcf.cwl [new file with mode: 0644]
cwl/preprocess/vcfbed/src/convert-vcf-bed-to-gvcf [new file with mode: 0755]
cwl/preprocess/vcfbed/vcfbed2gvcf-wf-lite.cwl [new file with mode: 0644]
cwl/preprocess/vcfbed/vcfbed2gvcf-wf.cwl [new file with mode: 0644]
cwl/preprocess/vcfbed/vcfbed2gvcf.cwl [new file with mode: 0644]
cwl/preprocess/vcfbed/yml/single-bed.yml [new file with mode: 0644]
cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-1kcgivcfbed.yml [new file with mode: 0644]
cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi-5samples.yml [new file with mode: 0644]
cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi.yml [new file with mode: 0644]
docker/beagle5.4/Dockerfile [new file with mode: 0644]
docker/cgivar2vcfbed/Dockerfile [new file with mode: 0644]
docker/lightning/Dockerfile [new file with mode: 0644]
docker/snpeff/Dockerfile [new file with mode: 0644]
docker/vcfutil/Dockerfile [new file with mode: 0644]

index acc2c05c1e5e2f5f3af28398ea3ee543a574089c..0d77b26cc448356494453a417b0bbfa4c6d3531e 100644 (file)
@@ -3,3 +3,4 @@ AUTHORS
 go.mod
 go.sum
 testdata/*
+readme.md
diff --git a/cwl/annotation/annotate-wf.cwl b/cwl/annotation/annotate-wf.cwl
new file mode 100644 (file)
index 0000000..0e6971b
--- /dev/null
@@ -0,0 +1,83 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: Workflow
+requirements:
+  ScatterFeatureRequirement: {}
+
+inputs:
+  sample: string
+  chrs: string[]
+  vcfdir: Directory
+  snpeffdatadir: Directory
+  genomeversion: string
+  dbsnp:
+    type: File
+    secondaryFiles: [.csi]
+  gnomaddir: Directory
+
+outputs:
+  annotatedvcf:
+    type: File
+    secondaryFiles: [.tbi]
+    outputSource: bcftools-concat/vcf
+  summary:
+    type: File
+    outputSource: totalcounts/summary
+
+steps:
+  getfiles:
+    run: getfiles.cwl
+    in:
+      sample: sample
+      chrs: chrs
+      vcfdir: vcfdir
+      gnomaddir: gnomaddir
+    out: [samples, vcfs, gnomads]
+
+  preprocess:
+    run: preprocess.cwl
+    scatter: [sample, vcf]
+    scatterMethod: dotproduct
+    in:
+      sample: getfiles/samples
+      vcf: getfiles/vcfs
+    out: [trimmedvcf]
+
+  snpeff-bcftools-annotate:
+    run: snpeff-bcftools-annotate.cwl
+    scatter: [sample, vcf, gnomad]
+    scatterMethod: dotproduct
+    in:
+      vcf: preprocess/trimmedvcf
+      sample: getfiles/samples
+      snpeffdatadir: snpeffdatadir
+      genomeversion: genomeversion
+      dbsnp: dbsnp
+      gnomad: getfiles/gnomads
+    out: [annotatedvcf]
+
+  bcftools-concat:
+    run: bcftools-concat.cwl
+    in:
+      sample: sample
+      vcfs: snpeff-bcftools-annotate/annotatedvcf
+    out: [vcf]
+
+  getcount:
+    run: getcount.cwl
+    scatter: [sample, vcf]
+    scatterMethod: dotproduct
+    in:
+      sample: getfiles/samples
+      vcf: snpeff-bcftools-annotate/annotatedvcf
+    out: [count]
+
+  totalcounts:
+    run: totalcounts.cwl
+    in:
+      sample: sample
+      counts: getcount/count
+    out: [summary]
diff --git a/cwl/annotation/bcftools-concat.cwl b/cwl/annotation/bcftools-concat.cwl
new file mode 100644 (file)
index 0000000..fe3b78b
--- /dev/null
@@ -0,0 +1,40 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.1
+class: CommandLineTool
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: snpeff4.3t
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 10000
+  arv:RuntimeConstraints:
+    keep_cache: 20000
+    outputDirType: keep_output_dir
+inputs:
+  sample: string
+  vcfs:
+    type: File[]
+    secondaryFiles: [.tbi]
+outputs:
+  vcf:
+    type: File
+    outputBinding:
+      glob: "*vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [bcftools, concat]
+arguments:
+  - $(inputs.vcfs)
+  - "-Oz"
+  - prefix: "-o"
+    valueFrom: $(inputs.sample)_snpeff_dbsnp_gnomad.vcf.gz
+  - shellQuote: false
+    valueFrom: "&&"
+  - "tabix"
+  - $(inputs.sample)_snpeff_dbsnp_gnomad.vcf.gz
diff --git a/cwl/annotation/getcount.cwl b/cwl/annotation/getcount.cwl
new file mode 100644 (file)
index 0000000..f243774
--- /dev/null
@@ -0,0 +1,27 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sample: string
+  vcf: File
+  bashscript:
+    type: File
+    default:
+      class: File
+      location: src/getcount.sh
+outputs:
+  count:
+    type: stdout
+arguments:
+  - $(inputs.bashscript)
+  - $(inputs.sample)
+  - $(inputs.vcf)
+stdout: $(inputs.sample).txt
diff --git a/cwl/annotation/getfiles.cwl b/cwl/annotation/getfiles.cwl
new file mode 100644 (file)
index 0000000..da14c89
--- /dev/null
@@ -0,0 +1,59 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: ExpressionTool
+requirements:
+  InlineJavascriptRequirement: {}
+hints:
+  LoadListingRequirement:
+    loadListing: shallow_listing
+inputs:
+  sample: string
+  chrs: string[]
+  vcfdir: Directory
+  gnomaddir: Directory
+outputs:
+  samples: string[]
+  vcfs: File[]
+  gnomads:
+    type: File[]
+    secondaryFiles: [.csi]
+expression: |
+  ${
+    var samples = [];
+    var vcfs = [];
+    var gnomads = [];
+
+    for (var i = 0; i < inputs.chrs.length; i++) {
+      var chr = inputs.chrs[i];
+      var sample = inputs.sample+"."+chr;
+      for (var j = 0; j < inputs.vcfdir.listing.length; j++) {
+        var file = inputs.vcfdir.listing[j];
+        if (file.basename.includes("."+chr+".")) {
+          var vcf = file;
+          break;
+        }
+      }
+      for (var j = 0; j < inputs.gnomaddir.listing.length; j++) {
+        var file = inputs.gnomaddir.listing[j];
+        if (file.basename.includes("."+chr+".")) {
+          var gnomad = file;
+          break;
+        }
+      }
+      for (var j = 0; j < inputs.gnomaddir.listing.length; j++) {
+        var file = inputs.gnomaddir.listing[j];
+        if (file.basename == gnomad.basename+".csi") {
+          gnomad.secondaryFiles = [file];
+          break;
+        }
+      }
+      samples.push(sample);
+      vcfs.push(vcf);
+      gnomads.push(gnomad);
+    }
+
+    return {"samples": samples, "vcfs": vcfs, "gnomads": gnomads};
+  }
diff --git a/cwl/annotation/preprocess.cwl b/cwl/annotation/preprocess.cwl
new file mode 100644 (file)
index 0000000..b32f93a
--- /dev/null
@@ -0,0 +1,32 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sample: string
+  vcf: File
+outputs:
+  trimmedvcf:
+    type: File
+    outputBinding:
+      glob: "*vcf.gz"
+baseCommand: awk
+arguments:
+  - '{if ($1 ~ /^#/ || $4 != $5) print $0}'
+  - $(inputs.vcf)
+  - shellQuote: False
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: False
+    valueFrom: ">"
+  - $(inputs.sample).vcf.gz
diff --git a/cwl/annotation/snpeff-bcftools-annotate.cwl b/cwl/annotation/snpeff-bcftools-annotate.cwl
new file mode 100644 (file)
index 0000000..d7b1e81
--- /dev/null
@@ -0,0 +1,103 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+label: Run SnpEff on given VCF and use bcftools to annotate with dbSNP and gnomAD
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: snpeff4.3t
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 20000
+    tmpdirMin: 16000
+inputs:
+  vcf:
+    type: File
+    label: Input VCF
+  sample:
+    type: string
+    label: Sample name
+  snpeffdatadir:
+    type: Directory
+    label: Database directory for SnpEff
+  genomeversion:
+    type: string
+    label: Genome version
+  dbsnp:
+    type: File
+    label: dbSNP BCF
+    secondaryFiles: [.csi]
+  gnomad:
+    type: File
+    label: gnomAD BCF
+    secondaryFiles: [.csi]
+outputs:
+  annotatedvcf:
+    type: File
+    label: Annotated VCF
+    outputBinding:
+      glob: "*_snpeff_dbsnp_gnomad.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [java]
+arguments:
+  - -Xmx$(runtime.ram)m
+  - prefix: "-jar"
+    valueFrom: "/snpEff/snpEff.jar"
+  - prefix: "-dataDir"
+    valueFrom: $(inputs.snpeffdatadir)
+  - $(inputs.genomeversion)
+  - $(inputs.vcf)
+  - shellQuote: False
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: False
+    valueFrom: ">"
+  - $(inputs.sample)_snpeff.vcf.gz
+  - shellQuote: False
+    valueFrom: "&&"
+  - "tabix"
+  - $(inputs.sample)_snpeff.vcf.gz
+  - shellQuote: False
+    valueFrom: "&&"
+  - "bcftools"
+  - "annotate"
+  - prefix: "--annotations"
+    valueFrom: $(inputs.dbsnp)
+  - prefix: "--columns"
+    valueFrom: "=ID"
+  - $(inputs.sample)_snpeff.vcf.gz
+  - "-Oz"
+  - prefix: "-o"
+    valueFrom: $(inputs.sample)_snpeff_dbsnp.vcf.gz
+  - shellQuote: False
+    valueFrom: "&&"
+  - "tabix"
+  - $(inputs.sample)_snpeff_dbsnp.vcf.gz
+  - shellQuote: False
+    valueFrom: "&&"
+  - "bcftools"
+  - "annotate"
+  - prefix: "--annotations"
+    valueFrom: $(inputs.gnomad)
+  - prefix: "--columns"
+    valueFrom: "INFO/AC,INFO/AN,INFO/AF,INFO/AF_afr,INFO/AF_amr,INFO/AF_asj,INFO/AF_eas,INFO/AF_fin,INFO/AF_nfe,INFO/AF_oth"
+  - $(inputs.sample)_snpeff_dbsnp.vcf.gz
+  - "-Oz"
+  - prefix: "-o"
+    valueFrom: $(inputs.sample)_snpeff_dbsnp_gnomad.vcf.gz
+  - shellQuote: False
+    valueFrom: "&&"
+  - "tabix"
+  - $(inputs.sample)_snpeff_dbsnp_gnomad.vcf.gz
+  - shellQuote: False
+    valueFrom: "&&"
+  - "rm"
+  - $(inputs.sample)_snpeff.vcf.gz
+  - $(inputs.sample)_snpeff.vcf.gz.tbi
+  - $(inputs.sample)_snpeff_dbsnp.vcf.gz
+  - $(inputs.sample)_snpeff_dbsnp.vcf.gz.tbi
diff --git a/cwl/annotation/src/getcount.sh b/cwl/annotation/src/getcount.sh
new file mode 100755 (executable)
index 0000000..3641d69
--- /dev/null
@@ -0,0 +1,18 @@
+#!/bin/bash
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+set -e
+set -o pipefail
+
+sample=$1
+vcf=$2
+
+total=`zcat $vcf | awk '!(/^#/)' | wc -l`
+rsid=`zcat $vcf | awk '(!(/^#/) && /rs/)' | wc -l`
+gnomad=`zcat $vcf | awk '(!(/^#/) && /AF/)' | wc -l`
+rsidpercentage=`awk -v n="$rsid" -v d="$total" 'BEGIN {print n/d*100}'`
+gnomadpercentage=`awk -v n="$gnomad" -v d="$total" 'BEGIN {print n/d*100}'`
+
+echo "$sample: $total total variants, $rsid variants ($rsidpercentage%) have rsID, $gnomad variants ($gnomadpercentage%) have gnomad AF"
diff --git a/cwl/annotation/src/totalcounts.sh b/cwl/annotation/src/totalcounts.sh
new file mode 100755 (executable)
index 0000000..ac9ed9f
--- /dev/null
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+set -e
+set -o pipefail
+
+counts=( "$@" )
+
+export allrsid="0"
+export allgnomad="0"
+export alltotal="0"
+
+cat ${counts[@]}
+
+for count in ${counts[@]}; do
+  rsid=`cut -d' ' -f5 $count`
+  gnomad=`cut -d' ' -f10 $count`
+  total=`cut -d' ' -f2 $count`
+  allrsid=`echo $(($allrsid + $rsid))`
+  allgnomad=`echo $(($allgnomad + $gnomad))`
+  alltotal=`echo $(($alltotal + $total))`
+done
+rsidpercentage=`awk -v n="$allrsid" -v d="$alltotal" 'BEGIN {print n/d*100}'`
+gnomadpercentage=`awk -v n="$allgnomad" -v d="$alltotal" 'BEGIN {print n/d*100}'`
+
+echo "overall: $alltotal total variants, $allrsid variants ($rsidpercentage%) have rsID, $allgnomad variants ($gnomadpercentage%) have gnomad AF"
diff --git a/cwl/annotation/totalcounts.cwl b/cwl/annotation/totalcounts.cwl
new file mode 100644 (file)
index 0000000..2994a24
--- /dev/null
@@ -0,0 +1,26 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sample: string
+  counts: File[]
+  bashscript:
+    type: File
+    default:
+      class: File
+      location: src/totalcounts.sh
+outputs:
+  summary:
+    type: stdout
+arguments:
+  - $(inputs.bashscript)
+  - $(inputs.counts)
+stdout: $(inputs.sample)_summary.txt
diff --git a/cwl/annotation/yml/annotate-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/annotation/yml/annotate-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml
new file mode 100644 (file)
index 0000000..b9e43a2
--- /dev/null
@@ -0,0 +1,43 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sample: "annotations"
+chrs:
+  - "chr1"
+  - "chr2"
+  - "chr3"
+  - "chr4"
+  - "chr5"
+  - "chr6"
+  - "chr7"
+  - "chr8"
+  - "chr9"
+  - "chr10"
+  - "chr11"
+  - "chr12"
+  - "chr13"
+  - "chr14"
+  - "chr15"
+  - "chr16"
+  - "chr17"
+  - "chr18"
+  - "chr19"
+  - "chr20"
+  - "chr21"
+  - "chr22"
+  - "chrX"
+  - "chrY"
+vcfdir:
+  class: Directory
+  location: keep:72c196515d6c449abc1fdbf1ead2a33e+1779
+snpeffdatadir:
+  class: Directory
+  location: keep:66c966928931de252274772c76f73025+52054
+genomeversion: "GRCh38.86"
+dbsnp:
+  class: File
+  location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf
+gnomaddir:
+  class: Directory
+  location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135
diff --git a/cwl/annotation/yml/preprocess-test.yml b/cwl/annotation/yml/preprocess-test.yml
new file mode 100644 (file)
index 0000000..db34e6d
--- /dev/null
@@ -0,0 +1,8 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sample: "annotations"
+vcf:
+  class: File
+  location: keep:3ba1b5c2ae01113baba4f9e69cd4b1bb+1566/annotations.chr1.vcf
diff --git a/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19.yml b/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19.yml
new file mode 100644 (file)
index 0000000..b0c3683
--- /dev/null
@@ -0,0 +1,18 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcf:
+  class: File
+  location: keep:42c61d54769fec3e54b73901c817c16c+10426/annotations.chr19.vcf
+sample: "annotations.chr19"
+snpeffdatadir:
+  class: Directory
+  location: keep:66c966928931de252274772c76f73025+52054
+genomeversion: "GRCh38.86"
+dbsnp:
+  class: File
+  location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf
+gnomad:
+  class: File
+  location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135/gnomad.genomes.v3.1.2.sites.chr19.bcf
diff --git a/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19_new.yml b/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19_new.yml
new file mode 100644 (file)
index 0000000..b0c3683
--- /dev/null
@@ -0,0 +1,18 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcf:
+  class: File
+  location: keep:42c61d54769fec3e54b73901c817c16c+10426/annotations.chr19.vcf
+sample: "annotations.chr19"
+snpeffdatadir:
+  class: Directory
+  location: keep:66c966928931de252274772c76f73025+52054
+genomeversion: "GRCh38.86"
+dbsnp:
+  class: File
+  location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf
+gnomad:
+  class: File
+  location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135/gnomad.genomes.v3.1.2.sites.chr19.bcf
diff --git a/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr22.yml b/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr22.yml
new file mode 100644 (file)
index 0000000..d77903f
--- /dev/null
@@ -0,0 +1,18 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcf:
+  class: File
+  location: keep:0f0a172c371fddeee3c3d1a500bbcf66+4438/annotations.chr22.vcf.gz
+sample: "annotations.chr22"
+snpeffdatadir:
+  class: Directory
+  location: keep:66c966928931de252274772c76f73025+52054
+genomeversion: "GRCh38.86"
+dbsnp:
+  class: File
+  location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf
+gnomad:
+  class: File
+  location: keep:3b02d9312ad6bc1dcf526d66ce02ed59+10572/gnomad.genomes.v3.1.2.sites.chr22.bcf
diff --git a/cwl/comparevcf/change-GT.cwl b/cwl/comparevcf/change-GT.cwl
new file mode 100644 (file)
index 0000000..b92666e
--- /dev/null
@@ -0,0 +1,29 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.2
+class: CommandLineTool
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sampleid: string
+  suffix: string
+  vcf: File
+  header: File
+  bashscript: File
+outputs:
+  modifiedvcf:
+    type: File
+    outputBinding:
+      glob: "*vcf.gz"
+    secondaryFiles: [.tbi]
+arguments:
+  - $(inputs.bashscript)
+  - $(inputs.sampleid)
+  - $(inputs.suffix)
+  - $(inputs.vcf)
+  - $(inputs.header)
diff --git a/cwl/comparevcf/comparevcf-original-lightning-wf.cwl b/cwl/comparevcf/comparevcf-original-lightning-wf.cwl
new file mode 100644 (file)
index 0000000..4b38fba
--- /dev/null
@@ -0,0 +1,66 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.2
+class: Workflow
+requirements:
+  StepInputExpressionRequirement: {}
+inputs:
+  sampleid: string
+  vcf: File
+  nocallbed: File
+  lightningvcf: File
+  sdf: Directory
+  bashscript: File
+  header: File
+
+outputs:
+  modifiedoriginalvcf:
+    type: File
+    outputSource: change-GT_original/modifiedvcf
+  modifiedlightningvcf:
+    type: File
+    outputSource: change-GT_lightning/modifiedvcf
+  evaldir:
+    type: Directory
+    outputSource: rtg-vcfeval/evaldir
+
+steps:
+  rtg-vcffilter:
+    run: ../imputation/rtg-vcffilter.cwl
+    in:
+      sample: sampleid
+      vcf: vcf
+      excludebed: nocallbed
+    out: [filteredvcf]
+
+  change-GT_original:
+    run: change-GT.cwl
+    in:
+      sampleid: sampleid
+      suffix:
+        valueFrom: "original"
+      vcf: rtg-vcffilter/filteredvcf
+      header: header
+      bashscript: bashscript
+    out: [modifiedvcf]
+
+  change-GT_lightning:
+    run: change-GT.cwl
+    in:
+      sampleid: sampleid
+      suffix:
+        valueFrom: "lightning"
+      vcf: lightningvcf
+      header: header
+      bashscript: bashscript
+    out: [modifiedvcf]
+
+  rtg-vcfeval:
+    run: rtg-vcfeval.cwl
+    in:
+      baselinevcf: change-GT_original/modifiedvcf
+      callsvcf: change-GT_lightning/modifiedvcf
+      sdf: sdf
+    out: [evaldir]
diff --git a/cwl/comparevcf/rtg-vcfeval.cwl b/cwl/comparevcf/rtg-vcfeval.cwl
new file mode 100644 (file)
index 0000000..9f5dda3
--- /dev/null
@@ -0,0 +1,35 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.2
+class: CommandLineTool
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  baselinevcf:
+    type: File
+    secondaryFiles: [.tbi]
+  callsvcf:
+    type: File
+    secondaryFiles: [.tbi]
+  sdf:
+    type: Directory
+outputs:
+  evaldir:
+    type: Directory
+    outputBinding:
+      glob: "eval"
+baseCommand: [rtg, vcfeval]
+arguments:
+  - prefix: "-b"
+    valueFrom: $(inputs.baselinevcf)
+  - prefix: "-c"
+    valueFrom: $(inputs.callsvcf)
+  - prefix: "-t"
+    valueFrom: $(inputs.sdf)
+  - prefix: "-o"
+    valueFrom: "eval"
diff --git a/cwl/comparevcf/src/change-GT.sh b/cwl/comparevcf/src/change-GT.sh
new file mode 100755 (executable)
index 0000000..6dfad07
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/bin/bash
+
+set -eo pipefail
+
+sampleid=$1
+suffix=$2
+vcf=$3
+header=$4
+
+cat $header <(bgzip -dc $vcf | egrep -v ^# | awk '{if ($4 != $5) print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6 "\t" $7 "\t" $8 "\tGT\t0/1"}') | bgzip -c > "$sampleid"_"$suffix".vcf.gz
+tabix "$sampleid"_"$suffix".vcf.gz
diff --git a/cwl/comparevcf/src/concatenate.sh b/cwl/comparevcf/src/concatenate.sh
new file mode 100755 (executable)
index 0000000..94ae743
--- /dev/null
@@ -0,0 +1,18 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/bin/bash
+
+set -e
+set -o pipefail
+
+lightningvcfdir="$1"
+
+chrs=`seq 22`
+chrs+=("X" "Y" "M")
+
+for chr in ${chrs[@]}; do
+  vcf=`ls $lightningvcfdir/*.chr$chr.*`
+  egrep -v ^# $vcf
+done
diff --git a/cwl/comparevcf/yml/comparevcf-original-lightning-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/comparevcf/yml/comparevcf-original-lightning-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml
new file mode 100644 (file)
index 0000000..e26b70e
--- /dev/null
@@ -0,0 +1,23 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "A-PRHS-PR000971-BL-COL-47620BL1"
+vcf:
+  class: File
+  location: keep:ebd528706c3192b77e56ebe93361194e+553/A-PRHS-PR000971-BL-COL-47620BL1_varonly.vcf.gz
+nocallbed:
+  class: File
+  location: keep:ebd528706c3192b77e56ebe93361194e+553/A-PRHS-PR000971-BL-COL-47620BL1_nocall.bed
+lightningvcf:
+  class: File
+  location: keep:4626bd8ca6b3ed8f1ef32dfd46815505+378/GRCh38.86_library_snpeff_dbsnp_gnomad.vcf.gz
+sdf:
+  class: Directory
+  location: keep:88c64c60daf829f30187fbcb423a46fc+1355/hg38
+bashscript:
+  class: File
+  location: ../src/change-GT.sh
+header:
+  class: File
+  location: keep:73394b577d9109cd266f5facfbd1e000+54/header
diff --git a/cwl/gvcf2fasta/append-sampleid.cwl b/cwl/gvcf2fasta/append-sampleid.cwl
new file mode 100644 (file)
index 0000000..f8d8193
--- /dev/null
@@ -0,0 +1,18 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: ExpressionTool
+inputs:
+  sampleid: string
+  suffix: string
+outputs:
+  appendedsampleid: string
+requirements:
+  InlineJavascriptRequirement: {}
+expression: |
+  ${
+    var appendedsampleid = inputs.sampleid + inputs.suffix;
+    return {"appendedsampleid": appendedsampleid};
+  }
diff --git a/cwl/gvcf2fasta/bcftools-consensus.cwl b/cwl/gvcf2fasta/bcftools-consensus.cwl
new file mode 100644 (file)
index 0000000..8362b9e
--- /dev/null
@@ -0,0 +1,44 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+label: Convert VCF to FASTA with bcftools consensus
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sampleid:
+    type: string
+    label: sample ID
+  vcf:
+    type: File
+    label: Input VCF
+    secondaryFiles: [.tbi]
+  ref:
+    type: File
+    label: Reference FASTA
+  mask:
+    type: File
+    label: Mask BED region where FASTA sequence is filled with 'N'
+  bashscript:
+    type: File
+    label: Script to run bcftools consensus
+    default:
+      class: File
+      location: src/bcftools-consensus.sh
+outputs:
+  fas:
+    type: File[]
+    label: Output FASTAs 
+    outputBinding:
+      glob: "*fa.gz"
+arguments:
+  - $(inputs.bashscript)
+  - $(inputs.sampleid)
+  - $(inputs.vcf)
+  - $(inputs.ref)
+  - $(inputs.mask)
diff --git a/cwl/gvcf2fasta/concat-get_bed_varonlyvcf.cwl b/cwl/gvcf2fasta/concat-get_bed_varonlyvcf.cwl
new file mode 100644 (file)
index 0000000..5ddd3f3
--- /dev/null
@@ -0,0 +1,52 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+label: Concatenate and get no call BED and variant only VCF from gVCF
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+    outdirMin: 40000
+inputs:
+  sampleid:
+    type: string
+    label: Sample ID
+  splitvcfdir:
+    type: Directory
+    label: Input directory of split gVCFs
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering  
+  genomebed:
+    type: File
+    label: Whole genome BED
+  bashscript:
+    type: File
+    label: Script to untar and concatenate vcf tar ball
+    default:
+      class: File
+      location: src/concat-get_bed_varonlyvcf.sh
+outputs:
+  nocallbed:
+    type: File
+    label: No call BED of gVCF
+    outputBinding:
+      glob: "*_nocall.bed"
+  varonlyvcf:
+    type: File
+    label: Variant only VCF
+    outputBinding:
+      glob: "*_varonly.vcf.gz"
+    secondaryFiles: [.tbi]
+arguments:
+  - $(inputs.bashscript)
+  - $(inputs.sampleid)
+  - $(inputs.splitvcfdir)
+  - $(inputs.gqcutoff)
+  - $(inputs.genomebed)
diff --git a/cwl/gvcf2fasta/fixvcf-get_bed_varonlyvcf.cwl b/cwl/gvcf2fasta/fixvcf-get_bed_varonlyvcf.cwl
new file mode 100644 (file)
index 0000000..7098c09
--- /dev/null
@@ -0,0 +1,49 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+label: Fix gVCF and get no call BED and variant only VCF from gVCF
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 15000
+inputs:
+  bashscript:
+    type: File
+    label: Bash script
+    default:
+      class: File
+      location: src/fixvcf-get_bed_varonlyvcf.sh
+  sampleid:
+    type: string
+    label: Sample ID
+  vcf:
+    type: File
+    label: Input gVCF
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering  
+  genomebed:
+    type: File
+    label: Whole genome BED
+outputs:
+  nocallbed:
+    type: File
+    label: No call BED of gVCF
+    outputBinding:
+      glob: "*_nocall.bed"
+  varonlyvcf:
+    type: File
+    label: Variant only VCF
+    outputBinding:
+      glob: "*_varonly.vcf.gz"
+    secondaryFiles: [.tbi]
+arguments:
+  - $(inputs.bashscript)
+  - $(inputs.sampleid)
+  - $(inputs.vcf)
+  - $(inputs.gqcutoff)
+  - $(inputs.genomebed)
diff --git a/cwl/gvcf2fasta/get_bed_varonlyvcf.cwl b/cwl/gvcf2fasta/get_bed_varonlyvcf.cwl
new file mode 100644 (file)
index 0000000..b1c9aa8
--- /dev/null
@@ -0,0 +1,80 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+label: Get no call BED and variant only VCF from gVCF
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+    outdirMin: 40000
+inputs:
+  sampleid:
+    type: string
+    label: Sample ID
+  vcf:
+    type: File
+    label: Input gVCF
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering  
+  genomebed:
+    type: File
+    label: Whole genome BED
+outputs:
+  nocallbed:
+    type: File
+    label: No call BED of gVCF
+    outputBinding:
+      glob: "*_nocall.bed"
+  varonlyvcf:
+    type: File
+    label: Variant only VCF
+    outputBinding:
+      glob: "*_varonly.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: /gvcf_regions/gvcf_regions.py
+arguments:
+  - prefix: "--min_GQ"
+    valueFrom: $(inputs.gqcutoff)
+  - $(inputs.vcf)
+  - shellQuote: False
+    valueFrom: ">"
+  - $(inputs.sampleid).bed
+  - shellQuote: False
+    valueFrom: "&&"
+  - "bedtools"
+  - "subtract"
+  - prefix: "-a"
+    valueFrom: $(inputs.genomebed)
+  - prefix: "-b"
+    valueFrom: $(inputs.sampleid).bed
+  - shellQuote: False
+    valueFrom: ">"
+  - $(inputs.sampleid)_nocall.bed
+  - shellQuote: False
+    valueFrom: "&&"
+  - "bgzip"
+  - "-dc"
+  - $(inputs.vcf)
+  - shellQuote: False
+    valueFrom: "|"
+  - "grep"
+  - "-v"
+  - "END="
+  - shellQuote: False
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: False
+    valueFrom: ">"
+  - $(inputs.sampleid)_varonly.vcf.gz
+  - shellQuote: False
+    valueFrom: "&&"
+  - "tabix"
+  - $(inputs.sampleid)_varonly.vcf.gz
diff --git a/cwl/gvcf2fasta/getfiles.cwl b/cwl/gvcf2fasta/getfiles.cwl
new file mode 100644 (file)
index 0000000..3b9e6a6
--- /dev/null
@@ -0,0 +1,37 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: ExpressionTool
+label: Create list of VCFs and sample names
+hints:
+  LoadListingRequirement:
+    loadListing: shallow_listing
+inputs:
+  dir:
+    type: Directory
+    label: Input directory of VCFs
+outputs:
+  vcfs:
+    type: File[]
+    label: Output VCFs
+  samples:
+    type: string[]
+    label: Sample names of VCFs
+requirements:
+  InlineJavascriptRequirement: {}
+expression: |
+  ${
+    var vcfs = [];
+    var samples = [];
+    for (var i = 0; i < inputs.dir.listing.length; i++) {
+      var file = inputs.dir.listing[i];
+      if (file.nameext == ".gz") {
+        vcfs.push(file);
+        var sample = file.basename.split(".").slice(0, -2).join(".");
+        samples.push(sample);
+      }
+    }
+    return {"vcfs": vcfs, "samples": samples};
+  }
diff --git a/cwl/gvcf2fasta/gvcf2fasta-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta-wf.cwl
new file mode 100644 (file)
index 0000000..daaf172
--- /dev/null
@@ -0,0 +1,62 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: Workflow
+label: Convert gVCF to FASTA
+requirements:
+  ScatterFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+
+inputs:
+  sampleid:
+    type: string
+    label: Sample ID
+  vcf:
+    type: File
+    label: Input gVCF
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering
+  genomebed:
+    type: File
+    label: Whole genome BED
+  ref:
+    type: File
+    label: Reference FASTA
+  haplotypes:
+    type: int[]
+    label: Haplotypes of sample
+    default: [1, 2]
+
+outputs:
+  fas:
+    type: File[]
+    label: Output pair of FASTAs
+    outputSource: bcftools-consensus/fa
+
+steps:
+  get_bed_varonlyvcf:
+    run: get_bed_varonlyvcf.cwl
+    in:
+      sampleid: sampleid
+      vcf: vcf
+      gqcutoff: gqcutoff
+      genomebed: genomebed
+    out: [nocallbed, varonlyvcf]
+
+  bcftools-consensus:
+    run: bcftools-consensus.cwl
+    scatter: haplotype
+    in:
+      sampleid: sampleid
+      vcf: get_bed_varonlyvcf/varonlyvcf
+      ref: ref
+      haplotype: haplotypes
+      mask: get_bed_varonlyvcf/nocallbed
+    out: [fa]
diff --git a/cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-imputation-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-imputation-wf.cwl
new file mode 100644 (file)
index 0000000..1975458
--- /dev/null
@@ -0,0 +1,78 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: Workflow
+label: Impute gVCF and convert to FASTA for gVCF with NON_REF
+requirements:
+  ScatterFeatureRequirement: {}
+  SubworkflowFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+
+inputs:
+  sampleid:
+    type: string
+    label: Sample ID
+  vcf:
+    type: File
+    label: Input gVCF
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering
+  genomebed:
+    type: File
+    label: Whole genome BED
+  ref:
+    type: File
+    label: Reference FASTA
+  haplotypes:
+    type: int[]
+    label: Haplotypes of sample
+    default: [1, 2]
+  chrs:
+    type: string[]
+  refsdir: Directory
+  mapsdir: Directory
+  panelnocallbed: File
+
+outputs:
+  fas:
+    type: File[]
+    label: Output pair of FASTAs
+    outputSource: bcftools-consensus/fa
+
+steps:
+  fixvcf-get_bed_varonlyvcf:
+    run: fixvcf-get_bed_varonlyvcf.cwl
+    in:
+      sampleid: sampleid
+      vcf: vcf
+      gqcutoff: gqcutoff
+      genomebed: genomebed
+    out: [nocallbed, varonlyvcf]
+
+  imputation-wf:
+    run: ../imputation/imputation-wf.cwl
+    in:
+      sample: sampleid
+      chrs: chrs
+      refsdir: refsdir
+      mapsdir: mapsdir
+      vcf: fixvcf-get_bed_varonlyvcf/varonlyvcf
+      nocallbed: fixvcf-get_bed_varonlyvcf/nocallbed
+      panelnocallbed: panelnocallbed
+    out: [phasedimputedvcf, phasedimputednocallbed]
+
+  bcftools-consensus:
+    run: bcftools-consensus.cwl
+    scatter: haplotype
+    in:
+      sampleid: sampleid
+      vcf: imputation-wf/phasedimputedvcf
+      ref: ref
+      haplotype: haplotypes
+      mask: imputation-wf/phasedimputednocallbed
+    out: [fa]
diff --git a/cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-wf.cwl
new file mode 100644 (file)
index 0000000..5128e9b
--- /dev/null
@@ -0,0 +1,54 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: Workflow
+label: Convert gVCF to FASTA for gVCF with NON_REF
+requirements:
+  ScatterFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+
+inputs:
+  sampleid:
+    type: string
+    label: Sample ID
+  vcf:
+    type: File
+    label: Input gVCF
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering
+  genomebed:
+    type: File
+    label: Whole genome BED
+  ref:
+    type: File
+    label: Reference FASTA
+
+outputs:
+  fas:
+    type: File[]
+    label: Output pair of FASTAs
+    outputSource: bcftools-consensus/fas
+
+steps:
+  fixvcf-get_bed_varonlyvcf:
+    run: fixvcf-get_bed_varonlyvcf.cwl
+    in:
+      sampleid: sampleid
+      vcf: vcf
+      gqcutoff: gqcutoff
+      genomebed: genomebed
+    out: [nocallbed, varonlyvcf]
+
+  bcftools-consensus:
+    run: bcftools-consensus.cwl
+    in:
+      sampleid: sampleid
+      vcf: fixvcf-get_bed_varonlyvcf/varonlyvcf
+      ref: ref
+      mask: fixvcf-get_bed_varonlyvcf/nocallbed
+    out: [fas]
diff --git a/cwl/gvcf2fasta/gvcf2fasta_splitvcf-imputation-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta_splitvcf-imputation-wf.cwl
new file mode 100644 (file)
index 0000000..08728ca
--- /dev/null
@@ -0,0 +1,82 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: Workflow
+label: Impute gVCF and convert to FASTA for gVCF split by chromosome
+requirements:
+  SubworkflowFeatureRequirement: {}
+  StepInputExpressionRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+
+inputs:
+  sampleid:
+    type: string
+    label: Sample ID
+  splitvcfdir:
+    type: Directory
+    label: Input directory of split gVCFs
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering
+  genomebed:
+    type: File
+    label: Whole genome BED
+  ref:
+    type: File
+    label: Reference FASTA
+  chrs: string[]
+  refsdir: Directory
+  mapsdir: Directory
+  panelnocallbed: File
+  panelcallbed: File
+
+outputs:
+  fas:
+    type: File[]
+    label: Output pair of FASTAs
+    outputSource: bcftools-consensus/fas
+
+steps:
+  concat-get_bed_varonlyvcf:
+    run: concat-get_bed_varonlyvcf.cwl
+    in:
+      sampleid: sampleid
+      splitvcfdir: splitvcfdir
+      gqcutoff: gqcutoff
+      genomebed: genomebed
+    out: [nocallbed, varonlyvcf]
+
+  imputation-wf:
+    run: ../imputation/imputation-wf.cwl
+    in:
+      sample: sampleid
+      chrs: chrs
+      refsdir: refsdir
+      mapsdir: mapsdir
+      vcf: concat-get_bed_varonlyvcf/varonlyvcf
+      nocallbed: concat-get_bed_varonlyvcf/nocallbed
+      panelnocallbed: panelnocallbed
+      panelcallbed: panelcallbed
+      genomebed: genomebed
+    out: [phasedimputedvcf, phasedimputednocallbed]
+
+  append-sampleid:
+    run: append-sampleid.cwl
+    in:
+      sampleid: sampleid
+      suffix:
+        valueFrom: "_phased_imputed"
+    out: [appendedsampleid]
+
+  bcftools-consensus:
+    run: bcftools-consensus.cwl
+    in:
+      sampleid: append-sampleid/appendedsampleid
+      vcf: imputation-wf/phasedimputedvcf
+      ref: ref
+      mask: imputation-wf/phasedimputednocallbed
+    out: [fas]
diff --git a/cwl/gvcf2fasta/gvcf2fasta_splitvcf-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta_splitvcf-wf.cwl
new file mode 100644 (file)
index 0000000..3e9d721
--- /dev/null
@@ -0,0 +1,56 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: Workflow
+label: Convert gVCF to FASTA for gVCF split by chromosome
+requirements:
+  ScatterFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+
+inputs:
+  sampleid:
+    type: string
+    label: Sample ID
+  splitvcfdir:
+    type: Directory
+    label: Input directory of split gVCFs
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering
+  genomebed:
+    type: File
+    label: Whole genome BED
+  ref:
+    type: File
+    label: Reference FASTA
+
+outputs:
+  fas:
+    type: File[]
+    label: Output pair of FASTAs
+    outputSource: bcftools-consensus/fas
+
+steps:
+  concat-get_bed_varonlyvcf:
+    run: concat-get_bed_varonlyvcf.cwl
+    in:
+      sampleid: sampleid
+      splitvcfdir: splitvcfdir
+      gqcutoff: gqcutoff
+      genomebed: genomebed
+    out: [nocallbed, varonlyvcf]
+
+  bcftools-consensus:
+    run: bcftools-consensus.cwl
+    in:
+      sampleid: sampleid
+      vcf: concat-get_bed_varonlyvcf/varonlyvcf
+      ref: ref
+      mask: concat-get_bed_varonlyvcf/nocallbed
+    out: [fas]
diff --git a/cwl/gvcf2fasta/gvcf2fasta_splitvcftar-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta_splitvcftar-wf.cwl
new file mode 100644 (file)
index 0000000..90722b6
--- /dev/null
@@ -0,0 +1,62 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: Workflow
+label: Convert gVCF to FASTA for gVCF tar split by chromosome
+requirements:
+  ScatterFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+
+inputs:
+  sampleid:
+    type: string
+    label: Sample ID
+  vcftar:
+    type: File
+    label: Input gVCF tar
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering
+  genomebed:
+    type: File
+    label: Whole genome BED
+  ref:
+    type: File
+    label: Reference FASTA
+  haplotypes:
+    type: int[]
+    label: Haplotypes of sample
+    default: [1, 2]
+
+outputs:
+  fas:
+    type: File[]
+    label: Output pair of FASTAs
+    outputSource: bcftools-consensus/fa
+
+steps:
+  untar-concat-get_bed_varonlyvcf:
+    run: untar-concat-get_bed_varonlyvcf.cwl
+    in:
+      sampleid: sampleid
+      vcftar: vcftar
+      gqcutoff: gqcutoff
+      genomebed: genomebed
+    out: [nocallbed, varonlyvcf]
+
+  bcftools-consensus:
+    run: bcftools-consensus.cwl
+    scatter: haplotype
+    in:
+      sampleid: sampleid
+      vcf: untar-concat-get_bed_varonlyvcf/varonlyvcf
+      ref: ref
+      haplotype: haplotypes
+      mask: untar-concat-get_bed_varonlyvcf/nocallbed
+    out: [fa]
diff --git a/cwl/gvcf2fasta/scatter-gvcf2fasta-wf.cwl b/cwl/gvcf2fasta/scatter-gvcf2fasta-wf.cwl
new file mode 100644 (file)
index 0000000..8309aa3
--- /dev/null
@@ -0,0 +1,55 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.1
+class: Workflow
+label: Scatter to Convert gVCF to FASTA
+requirements:
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  arv:IntermediateOutput:
+    outputTTL: 604800
+
+inputs:
+  vcfsdir:
+    type: Directory
+    label: Input directory of VCFs
+  genomebed:
+    type: File
+    label: Whole genome BED
+  ref:
+    type: File
+    label: Reference FASTA
+
+outputs:
+  fas:
+    type:
+      type: array
+      items:
+        type: array
+        items: File
+    label: Output pairs of FASTAs
+    outputSource: gvcf2fasta-wf/fas
+
+steps:
+  getfiles:
+    run: getfiles.cwl
+    in:
+      dir: vcfsdir
+    out: [vcfs, samples]
+  gvcf2fasta-wf:
+    run: gvcf2fasta-wf.cwl
+    scatter: [sampleid, vcf]
+    scatterMethod: dotproduct
+    in:
+      sampleid: getfiles/samples
+      vcf: getfiles/vcfs
+      genomebed: genomebed
+      ref: ref
+    out: [fas]
diff --git a/cwl/gvcf2fasta/scatter-gvcf2fasta_nonrefvcf-wf.cwl b/cwl/gvcf2fasta/scatter-gvcf2fasta_nonrefvcf-wf.cwl
new file mode 100644 (file)
index 0000000..fcee4d5
--- /dev/null
@@ -0,0 +1,57 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.1
+class: Workflow
+label: Scatter to Convert gVCF to FASTA with NON_REF
+requirements:
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  arv:IntermediateOutput:
+    outputTTL: 604800
+
+inputs:
+  sampleids:
+    type: string[]
+    label: Sample IDs
+  vcfs:
+    type: File[]
+    label: Input VCFs
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering
+  genomebed:
+    type: File
+    label: Whole genome BED
+  ref:
+    type: File
+    label: Reference FASTA
+
+outputs:
+  fas:
+    type:
+      type: array
+      items:
+        type: array
+        items: File
+    label: Output pairs of FASTAs
+    outputSource: gvcf2fasta_nonrefvcf-wf/fas
+
+steps:
+  gvcf2fasta_nonrefvcf-wf:
+    run: gvcf2fasta_nonrefvcf-wf.cwl
+    scatter: [sampleid, vcf]
+    scatterMethod: dotproduct
+    in:
+      sampleid: sampleids
+      vcf: vcfs
+      gqcutoff: gqcutoff
+      genomebed: genomebed
+      ref: ref
+    out: [fas]
diff --git a/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-imputation-wf.cwl b/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-imputation-wf.cwl
new file mode 100644 (file)
index 0000000..5fc067c
--- /dev/null
@@ -0,0 +1,65 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.1
+class: Workflow
+label: Scatter to impute gVCF and convert gVCF to FASTA
+requirements:
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+
+inputs:
+  sampleids:
+    type: string[]
+    label: Sample IDs
+  splitvcfdirs:
+    type: Directory[]
+    label: Input directory of split gVCFs
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering
+  genomebed:
+    type: File
+    label: Whole genome BED
+  ref:
+    type: File
+    label: Reference FASTA
+  chrs: string[]
+  refsdir: Directory
+  mapsdir: Directory
+  panelnocallbed: File
+  panelcallbed: File
+
+outputs:
+  fas:
+    type:
+      type: array
+      items:
+        type: array
+        items: File
+    label: Output pairs of FASTAs
+    outputSource: gvcf2fasta_splitvcf-imputation-wf/fas
+
+steps:
+  gvcf2fasta_splitvcf-imputation-wf:
+    run: gvcf2fasta_splitvcf-imputation-wf.cwl
+    scatter: [sampleid, splitvcfdir]
+    scatterMethod: dotproduct
+    in:
+      sampleid: sampleids
+      splitvcfdir: splitvcfdirs
+      gqcutoff: gqcutoff
+      genomebed: genomebed
+      ref: ref
+      chrs: chrs
+      refsdir: refsdir
+      mapsdir: mapsdir
+      panelnocallbed: panelnocallbed
+      panelcallbed: panelcallbed
+    out: [fas]
diff --git a/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-wf.cwl b/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-wf.cwl
new file mode 100644 (file)
index 0000000..b50e269
--- /dev/null
@@ -0,0 +1,55 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.1
+class: Workflow
+label: Scatter to convert gVCF to FASTA
+requirements:
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+
+inputs:
+  sampleids:
+    type: string[]
+    label: Sample IDs
+  splitvcfdirs:
+    type: Directory[]
+    label: Input directory of split gVCFs
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering
+  genomebed:
+    type: File
+    label: Whole genome BED
+  ref:
+    type: File
+    label: Reference FASTA
+
+outputs:
+  fas:
+    type:
+      type: array
+      items:
+        type: array
+        items: File
+    label: Output pairs of FASTAs
+    outputSource: gvcf2fasta_splitvcf-wf/fas
+
+steps:
+  gvcf2fasta_splitvcf-wf:
+    run: gvcf2fasta_splitvcf-wf.cwl
+    scatter: [sampleid, splitvcfdir]
+    scatterMethod: dotproduct
+    in:
+      sampleid: sampleids
+      splitvcfdir: splitvcfdirs
+      gqcutoff: gqcutoff
+      genomebed: genomebed
+      ref: ref
+    out: [fas]
diff --git a/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcftar-wf.cwl b/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcftar-wf.cwl
new file mode 100644 (file)
index 0000000..0899a02
--- /dev/null
@@ -0,0 +1,57 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.1
+class: Workflow
+label: Scatter to Convert gVCF to FASTA
+requirements:
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  arv:IntermediateOutput:
+    outputTTL: 604800
+
+inputs:
+  sampleids:
+    type: string[]
+    label: Sample IDs
+  vcftars:
+    type: File[]
+    label: Input VCF tars
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering
+  genomebed:
+    type: File
+    label: Whole genome BED
+  ref:
+    type: File
+    label: Reference FASTA
+
+outputs:
+  fas:
+    type:
+      type: array
+      items:
+        type: array
+        items: File
+    label: Output pairs of FASTAs
+    outputSource: gvcf2fasta_splitvcf-wf/fas
+
+steps:
+  gvcf2fasta_splitvcf-wf:
+    run: gvcf2fasta_splitvcf-wf.cwl
+    scatter: [sampleid, vcftar]
+    scatterMethod: dotproduct
+    in:
+      sampleid: sampleids
+      vcftar: vcftars
+      gqcutoff: gqcutoff
+      genomebed: genomebed
+      ref: ref
+    out: [fas]
diff --git a/cwl/gvcf2fasta/src/bcftools-consensus.sh b/cwl/gvcf2fasta/src/bcftools-consensus.sh
new file mode 100755 (executable)
index 0000000..531a7c6
--- /dev/null
@@ -0,0 +1,18 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/bin/bash
+
+set -eo pipefail
+
+sampleid="$1"
+vcf="$2"
+ref="$3"
+mask="$4"
+
+haplotypes=(1 2)
+
+for haplotype in ${haplotypes[@]}; do
+  bcftools consensus --fasta-ref $ref --haplotype $haplotype --mask $mask $vcf | bgzip -c > "$sampleid"."$haplotype".fa.gz
+done
diff --git a/cwl/gvcf2fasta/src/concat-get_bed_varonlyvcf.sh b/cwl/gvcf2fasta/src/concat-get_bed_varonlyvcf.sh
new file mode 100755 (executable)
index 0000000..43f1a91
--- /dev/null
@@ -0,0 +1,25 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/bin/bash
+
+set -eo pipefail
+
+sampleid="$1"
+splitvcfdir="$2"
+gqcutoff="$3"
+genomebed="$4"
+
+chroms=(chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY chrM)
+splitvcfs=$(for chrom in ${chroms[@]}; do ls $splitvcfdir/*$chrom\.*gz; done)
+echo "splitvcfs: ${splitvcfs[@]}"
+
+bcftools concat ${splitvcfs[@]} -n | bcftools view --trim-alt-alleles | egrep -v "\*|<NON_REF>" | tee \
+  >( /gvcf_regions/gvcf_regions.py --min_GQ $gqcutoff - > "$sampleid".bed ) \
+  >( awk '{if ($5 != ".") print $0}' | bgzip -c > "$sampleid"_varonly.vcf.gz ) \
+  > /dev/null
+
+bedtools subtract -a $genomebed -b "$sampleid".bed > "$sampleid"_nocall.bed
+rm "$sampleid".bed
+tabix "$sampleid"_varonly.vcf.gz
diff --git a/cwl/gvcf2fasta/src/fixvcf-get_bed_varonlyvcf.sh b/cwl/gvcf2fasta/src/fixvcf-get_bed_varonlyvcf.sh
new file mode 100755 (executable)
index 0000000..2ed5369
--- /dev/null
@@ -0,0 +1,21 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/bin/bash
+
+set -eo pipefail
+
+sampleid="$1"
+vcf="$2"
+gqcutoff="$3"
+genomebed="$4"
+
+bcftools view --trim-alt-alleles $vcf | egrep -v "\*|<NON_REF>" | tee \
+  >( /gvcf_regions/gvcf_regions.py --min_GQ $gqcutoff - > "$sampleid".bed ) \
+  >( rtg vcffilter -i - -o - --remove-overlapping | awk '{if ($5 != ".") print $0}' | bgzip -c > "$sampleid"_varonly.vcf.gz ) \
+  > /dev/null
+
+bedtools subtract -a $genomebed -b "$sampleid".bed > "$sampleid"_nocall.bed
+rm "$sampleid".bed
+tabix "$sampleid"_varonly.vcf.gz
diff --git a/cwl/gvcf2fasta/src/untar-concat.sh b/cwl/gvcf2fasta/src/untar-concat.sh
new file mode 100755 (executable)
index 0000000..e10d607
--- /dev/null
@@ -0,0 +1,21 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/bin/bash
+
+set -eo pipefail
+
+sampleid="$1"
+vcftar="$2"
+
+tar -xzf $vcftar -C .
+splitvcfdir=`ls`
+
+chroms=(chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY chrM)
+splitvcfs=$(for chrom in ${chroms[@]}; do ls $splitvcfdir/*$chrom\_*gz; done)
+echo "splitvcfs: ${splitvcfs[@]}"
+
+bcftools concat ${splitvcfs[@]} -n -O z -o $sampleid.vcf.gz
+
+rm -rf $splitvcfdir
diff --git a/cwl/gvcf2fasta/untar-concat-get_bed_varonlyvcf.cwl b/cwl/gvcf2fasta/untar-concat-get_bed_varonlyvcf.cwl
new file mode 100644 (file)
index 0000000..fb621e1
--- /dev/null
@@ -0,0 +1,97 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+label: Untar, concatenate, and get no call BED and variant only VCF from gVCF
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+    outdirMin: 40000
+inputs:
+  sampleid:
+    type: string
+    label: Sample ID
+  vcftar:
+    type: File
+    label: Input gVCF tar
+  gqcutoff:
+    type: int
+    label: GQ (Genotype Quality) cutoff for filtering  
+  genomebed:
+    type: File
+    label: Whole genome BED
+  bashscript:
+    type: File
+    label: Script to untar and concatenate vcf tar ball
+    default:
+      class: File
+      location: src/untar-concat.sh
+outputs:
+  nocallbed:
+    type: File
+    label: No call BED of gVCF
+    outputBinding:
+      glob: "*_nocall.bed"
+  varonlyvcf:
+    type: File
+    label: Variant only VCF
+    outputBinding:
+      glob: "*_varonly.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: bash
+arguments:
+  - $(inputs.bashscript)
+  - $(inputs.sampleid)
+  - $(inputs.vcftar)
+  - shellQuote: False
+    valueFrom: "&&"
+  - "/gvcf_regions/gvcf_regions.py"
+  - prefix: "--min_GQ"
+    valueFrom: $(inputs.gqcutoff)
+  - $(inputs.sampleid).vcf.gz
+  - shellQuote: False
+    valueFrom: ">"
+  - $(inputs.sampleid).bed
+  - shellQuote: False
+    valueFrom: "&&"
+  - "bedtools"
+  - "subtract"
+  - prefix: "-a"
+    valueFrom: $(inputs.genomebed)
+  - prefix: "-b"
+    valueFrom: $(inputs.sampleid).bed
+  - shellQuote: False
+    valueFrom: ">"
+  - $(inputs.sampleid)_nocall.bed
+  - shellQuote: False
+    valueFrom: "&&"
+  - "bgzip"
+  - "-dc"
+  - $(inputs.sampleid).vcf.gz
+  - shellQuote: False
+    valueFrom: "|"
+  - "grep"
+  - "-v"
+  - "END="
+  - shellQuote: False
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: False
+    valueFrom: ">"
+  - $(inputs.sampleid)_varonly.vcf.gz
+  - shellQuote: False
+    valueFrom: "&&"
+  - "tabix"
+  - $(inputs.sampleid)_varonly.vcf.gz
+  - shellQuote: False
+    valueFrom: "&&"
+  - "rm"
+  - $(inputs.sampleid).vcf.gz
+  - $(inputs.sampleid).bed
diff --git a/cwl/gvcf2fasta/yml/bcftools-consensus-A-IIAA-IA000196-BL-NCR-14AD66938_phased_imputed.yml b/cwl/gvcf2fasta/yml/bcftools-consensus-A-IIAA-IA000196-BL-NCR-14AD66938_phased_imputed.yml
new file mode 100644 (file)
index 0000000..0be1363
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "A-IIAA-IA000196-BL-NCR-14AD66938_phased_imputed"
+vcf:
+  class: File
+  location: keep:827ea468c00a16bf711bd215ea2ce2e6+175/A-IIAA-IA000196-BL-NCR-14AD66938_phased_imputed.vcf.gz
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
+haplotype: 1
+mask:
+  class: File
+  location: keep:17670aabfa66091b19c8c2fbfb35cbf9+145/A-IIAA-IA000196-BL-NCR-14AD66938_intersect.bed
diff --git a/cwl/gvcf2fasta/yml/bcftools-consensus-test-newadni.yml b/cwl/gvcf2fasta/yml/bcftools-consensus-test-newadni.yml
new file mode 100644 (file)
index 0000000..0656c7c
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "A-WCAP-WC000711-BL-COL-39141BL1"
+vcf:
+  class: File
+  location: keep:97686398b32cd680c674e0e5174078e3+6243/A-WCAP-WC000711-BL-COL-39141BL1_varonly.vcf.gz
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
+haplotype: 1
+mask:
+  class: File
+  location: keep:97686398b32cd680c674e0e5174078e3+6243/A-WCAP-WC000711-BL-COL-39141BL1_nocall.bed
diff --git a/cwl/gvcf2fasta/yml/bcftools-consensus-test-varonly.yml b/cwl/gvcf2fasta/yml/bcftools-consensus-test-varonly.yml
new file mode 100644 (file)
index 0000000..7d83717
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "HG00551.haplotypeCalls.er.raw"
+vcf:
+  class: File
+  location: keep:ae8975b43e2b9f1ab9e3d09395f3c0f6+604/HG00551.haplotypeCalls.er.raw_varonly.vcf.gz
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
+haplotype: 1
+mask:
+  class: File
+  location: keep:ae8975b43e2b9f1ab9e3d09395f3c0f6+604/HG00551.haplotypeCalls.er.raw_nocall.bed
diff --git a/cwl/gvcf2fasta/yml/bcftools-consensus-test.yml b/cwl/gvcf2fasta/yml/bcftools-consensus-test.yml
new file mode 100644 (file)
index 0000000..61ccca3
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "HG00551.haplotypeCalls.er.raw"
+vcf:
+  class: File
+  location: keep:3c41c90c022a9307ffa905426b04096b+5179901/HG00551.haplotypeCalls.er.raw.vcf.gz
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
+haplotype: 1
+mask:
+  class: File
+  location: keep:ae8975b43e2b9f1ab9e3d09395f3c0f6+604/HG00551.haplotypeCalls.er.raw_nocall.bed
diff --git a/cwl/gvcf2fasta/yml/concat-get_bed_varonlyvcf-A-WCAP-WC001544-BL-COL-49161BL1.yml b/cwl/gvcf2fasta/yml/concat-get_bed_varonlyvcf-A-WCAP-WC001544-BL-COL-49161BL1.yml
new file mode 100644 (file)
index 0000000..ea8f665
--- /dev/null
@@ -0,0 +1,12 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "A-WCAP-WC001544-BL-COL-49161BL1"
+splitvcfdir:
+  class: Directory
+  location: keep:7ad01e84179c627d92b067ebe0c1e7bb+5893
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
diff --git a/cwl/gvcf2fasta/yml/get_bed_varonlyvcf-test.yml b/cwl/gvcf2fasta/yml/get_bed_varonlyvcf-test.yml
new file mode 100644 (file)
index 0000000..1137060
--- /dev/null
@@ -0,0 +1,11 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "HG00551.haplotypeCalls.er.raw"
+vcf:
+  class: File
+  location: keep:3c41c90c022a9307ffa905426b04096b+5179901/HG00551.haplotypeCalls.er.raw.vcf.gz
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test-newadni.yml b/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test-newadni.yml
new file mode 100644 (file)
index 0000000..f289b3c
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "A-WCAP-WC000711-BL-COL-39141BL1"
+vcf:
+  class: File
+  location: keep:3eead47d28a9cfb1fb1dc4fde1d38af0+4961919/A-WCAP-WC000711-BL-COL-39141BL1_vcpa1.1.allchr_g.vcf.gz
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test.yml b/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test.yml
new file mode 100644 (file)
index 0000000..0e9645e
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "HG00551.haplotypeCalls.er.raw"
+vcf:
+  class: File
+  location: keep:3c41c90c022a9307ffa905426b04096b+5179901/HG00551.haplotypeCalls.er.raw.vcf.gz
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test37.yml b/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test37.yml
new file mode 100644 (file)
index 0000000..50da851
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "LP6005441-DNA_A01"
+vcf:
+  class: File
+  location: keep:865a18acdb12f8d41b76a1bff76b2211+89722/LP6005441-DNA_A01.vcf.gz
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:1310f01a495c81d3c7d3154f1a73527f+63/human_g1k_v37.bed
+ref:
+  class: File
+  location: keep:5a42cfaddd3a9cfc4fac89b3fe73c6f6+751/human_g1k_v37.fasta.gz
diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-imputation-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-imputation-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml
new file mode 100644 (file)
index 0000000..d3ecbb1
--- /dev/null
@@ -0,0 +1,25 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
+sampleid: "A-PRHS-PR000971-BL-COL-47620BL1"
+vcf:
+  class: File
+  location: keep:4cba97691d17f8542116c74ca6c1d89d+2045/A-PRHS-PR000971-BL-COL-47620BL1_vcpa1.1.allchr_g.vcf.gz
+chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"]
+refsdir:
+  class: Directory
+  location: keep:3a1673a2230877bfaf92b50f7376529c+10590
+mapsdir:
+  class: Directory
+  location: keep:10b1baade985c576a97dfc37d12b953b+1096
+panelnocallbed:
+  class: File
+  location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed
diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml
new file mode 100644 (file)
index 0000000..22aaa49
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
+sampleid: A-PRHS-PR000971-BL-COL-47620BL1
+vcf:
+  class: File
+  location: keep:4cba97691d17f8542116c74ca6c1d89d+2045/A-PRHS-PR000971-BL-COL-47620BL1_vcpa1.1.allchr_g.vcf.gz
diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-IIAA-IA000196-BL-NCR-14AD66938.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-IIAA-IA000196-BL-NCR-14AD66938.yml
new file mode 100644 (file)
index 0000000..0398aa4
--- /dev/null
@@ -0,0 +1,28 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "A-IIAA-IA000196-BL-NCR-14AD66938"
+splitvcfdir:
+  class: Directory
+  location: keep:27e8ae061969d12e6137aa9b2e9b585c+6128
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
+chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"]
+refsdir:
+  class: Directory
+  location: keep:3a1673a2230877bfaf92b50f7376529c+10590
+mapsdir:
+  class: Directory
+  location: keep:10b1baade985c576a97dfc37d12b953b+1096
+panelnocallbed:
+  class: File
+  location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed
+panelcallbed:
+  class: File
+  location: keep:ea4364c48def931b8cdaf07913a3657b+94/HG001_GRCh38_1_22_v4.2.1_benchmark.bed
diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml
new file mode 100644 (file)
index 0000000..4063dc5
--- /dev/null
@@ -0,0 +1,25 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "A-WCAP-WC001544-BL-COL-49161BL1"
+splitvcfdir:
+  class: Directory
+  location: keep:7ad01e84179c627d92b067ebe0c1e7bb+5893
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
+chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"]
+refsdir:
+  class: Directory
+  location: keep:3a1673a2230877bfaf92b50f7376529c+10590
+mapsdir:
+  class: Directory
+  location: keep:10b1baade985c576a97dfc37d12b953b+1096
+panelnocallbed:
+  class: File
+  location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed
diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml
new file mode 100644 (file)
index 0000000..ef0db6a
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "A-WCAP-WC001544-BL-COL-49161BL1"
+splitvcfdir:
+  class: Directory
+  location: keep:7ad01e84179c627d92b067ebe0c1e7bb+5893
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcftar-wf-test.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcftar-wf-test.yml
new file mode 100644 (file)
index 0000000..df6431b
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "A-WCAP-WC000711-BL-COL-39141BL1"
+vcftar:
+  class: File
+  location: keep:9b09803eb9ccd2b31d4db4ce80ed7a03+5003343/A-WCAP-WC000711-BL-COL-39141BL1_vcpa1.0_gatk_GRU-IRB-PUB.gvcf.tar.gz
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
diff --git a/cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-16gvcf.yml b/cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-16gvcf.yml
new file mode 100644 (file)
index 0000000..1c6cf14
--- /dev/null
@@ -0,0 +1,13 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfsdir:
+  class: Directory
+  location: keep:18966cf8fb85d5aa2b30f5773f02b93e+73402
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
diff --git a/cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-test.yml b/cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-test.yml
new file mode 100644 (file)
index 0000000..df78a85
--- /dev/null
@@ -0,0 +1,13 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfsdir:
+  class: Directory
+  location: keep:bde5e9a92ee9cf91fe95260c57386731+10722
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
diff --git a/cwl/gvcf2fasta/yml/scatter-gvcf2fasta_splitvcf-imputation-wf-100test.yml b/cwl/gvcf2fasta/yml/scatter-gvcf2fasta_splitvcf-imputation-wf-100test.yml
new file mode 100644 (file)
index 0000000..9ad600f
--- /dev/null
@@ -0,0 +1,326 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
+chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"]
+refsdir:
+  class: Directory
+  location: keep:3a1673a2230877bfaf92b50f7376529c+10590
+mapsdir:
+  class: Directory
+  location: keep:10b1baade985c576a97dfc37d12b953b+1096
+panelnocallbed:
+  class: File
+  location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed
+panelcallbed:
+  class: File
+  location: keep:ea4364c48def931b8cdaf07913a3657b+94/HG001_GRCh38_1_22_v4.2.1_benchmark.bed
+sampleids:
+  - "A-IIAA-IA000604-BL-NCR-14AD67907"
+  - "A-IIAA-IA000369-BL-NCR-14AD67573"
+  - "A-IIAA-IA000196-BL-NCR-14AD66938"
+  - "A-IIAA-IA000194-BL-NCR-14AD66933"
+  - "A-IIAA-IA000009-BL-NCR-14AD67102"
+  - "A-WCAP-WC001736-BL-COL-62260BL1"
+  - "A-WCAP-WC001725-BL-COL-62376BL1"
+  - "A-WCAP-WC001708-BL-COL-50951BL1"
+  - "A-WCAP-WC001695-BL-COL-46967BL1"
+  - "A-WCAP-WC001710-BL-COL-40530BL1"
+  - "A-WCAP-WC001720-BL-COL-62328BL1"
+  - "A-WCAP-WC001704-BL-COL-48409BL1"
+  - "A-WCAP-WC001702-BL-COL-45946BL1"
+  - "A-WCAP-WC001697-BL-COL-46951BL1"
+  - "A-WCAP-WC001699-BL-COL-55120BL1"
+  - "A-WCAP-WC001700-BL-COL-57684BL1"
+  - "A-WCAP-WC001696-BL-COL-50506BL1"
+  - "A-WCAP-WC001693-BL-COL-48336BL1"
+  - "A-WCAP-WC001683-BL-COL-49188BL1"
+  - "A-WCAP-WC001687-BL-COL-48252BL1"
+  - "A-WCAP-WC001694-BL-COL-45207BL1"
+  - "A-WCAP-WC001686-BL-COL-50549BL1"
+  - "A-WCAP-WC001691-BL-COL-46031BL1"
+  - "A-WCAP-WC001688-BL-COL-50977BL1"
+  - "A-WCAP-WC001684-BL-COL-47006BL1"
+  - "A-WCAP-WC001681-BL-COL-57657BL1"
+  - "A-WCAP-WC001682-BL-COL-47560BL1"
+  - "A-WCAP-WC001679-BL-COL-49250BL1"
+  - "A-WCAP-WC001672-BL-COL-45187BL1"
+  - "A-WCAP-WC001673-BL-COL-48284BL1"
+  - "A-WCAP-WC001675-BL-COL-48314BL1"
+  - "A-WCAP-WC001677-BL-COL-46990BL1"
+  - "A-WCAP-WC001671-BL-COL-50527BL1"
+  - "A-WCAP-WC001669-BL-COL-57703BL1"
+  - "A-WCAP-WC001667-BL-COL-46970BL1"
+  - "A-WCAP-WC001670-BL-COL-50445BL1"
+  - "A-WCAP-WC001654-BL-COL-46029BL1"
+  - "A-WCAP-WC001664-BL-COL-39293BL1"
+  - "A-WCAP-WC001665-BL-COL-47583BL1"
+  - "A-WCAP-WC001653-BL-COL-48362BL1"
+  - "A-WCAP-WC001657-BL-COL-65820BL1"
+  - "A-WCAP-WC001658-BL-COL-50384BL1"
+  - "A-WCAP-WC001666-BL-COL-48390BL1"
+  - "A-WCAP-WC001656-BL-COL-57746BL1"
+  - "A-WCAP-WC001638-BL-COL-47008BL1"
+  - "A-WCAP-WC001639-BL-COL-41818BL1"
+  - "A-WCAP-WC001646-BL-COL-45962BL1"
+  - "A-WCAP-WC001652-BL-COL-47594BL1"
+  - "A-WCAP-WC001640-BL-COL-47537BL1"
+  - "A-WCAP-WC001629-BL-COL-69602BL1"
+  - "A-WCAP-WC001641-BL-COL-46986BL1"
+  - "A-WCAP-WC001645-BL-COL-41786BL1"
+  - "A-WCAP-WC001636-BL-COL-47553BL1"
+  - "A-WCAP-WC001634-BL-COL-50462BL1"
+  - "A-WCAP-WC001623-BL-COL-56498BL1"
+  - "A-WCAP-WC001627-BL-COL-56607BL1"
+  - "A-WCAP-WC001626-BL-COL-48292BL1"
+  - "A-WCAP-WC001621-BL-COL-48345BL1"
+  - "A-WCAP-WC001618-BL-COL-50400BL1"
+  - "A-WCAP-WC001622-BL-COL-50921BL1"
+  - "A-WCAP-WC001616-BL-COL-56626BL1"
+  - "A-WCAP-WC001617-BL-COL-45961BL1"
+  - "A-WCAP-WC001612-BL-COL-49158BL1"
+  - "A-WCAP-WC001608-BL-COL-48342BL1"
+  - "A-WCAP-WC001611-BL-COL-48295BL1"
+  - "A-WCAP-WC001605-BL-COL-45954BL1"
+  - "A-WCAP-WC001594-BL-COL-40429BL1"
+  - "A-WCAP-WC001606-BL-COL-48422BL1"
+  - "A-WCAP-WC001595-BL-COL-45200BL1"
+  - "A-WCAP-WC001602-BL-COL-45226BL1"
+  - "A-WCAP-WC001604-BL-COL-56480BL1"
+  - "A-WCAP-WC001598-BL-COL-49123BL1"
+  - "A-WCAP-WC001603-BL-COL-56489BL1"
+  - "A-WCAP-WC001587-BL-COL-45975BL1"
+  - "A-WCAP-WC001593-BL-COL-45249BL1"
+  - "A-WCAP-WC001589-BL-COL-55018BL1"
+  - "A-WCAP-WC001577-BL-COL-48318BL1"
+  - "A-WCAP-WC001586-BL-COL-45943BL1"
+  - "A-WCAP-WC001585-BL-COL-45991BL1"
+  - "A-WCAP-WC001584-BL-COL-45976BL1"
+  - "A-WCAP-WC001581-BL-COL-56483BL1"
+  - "A-WCAP-WC001582-BL-COL-47610BL1"
+  - "A-WCAP-WC001572-BL-COL-45937BL1"
+  - "A-WCAP-WC001574-BL-COL-56642BL1"
+  - "A-WCAP-WC001567-BL-COL-45235BL1"
+  - "A-WCAP-WC001573-BL-COL-46034BL1"
+  - "A-WCAP-WC001568-BL-COL-50455BL1"
+  - "A-WCAP-WC001566-BL-COL-56566BL1"
+  - "A-WCAP-WC001559-BL-COL-49283BL1"
+  - "A-WCAP-WC001556-BL-COL-45259BL1"
+  - "A-WCAP-WC001564-BL-COL-39158BL1"
+  - "A-WCAP-WC001552-BL-COL-55020BL1"
+  - "A-WCAP-WC001563-BL-COL-41119BL1"
+  - "A-WCAP-WC001558-BL-COL-48277BL1"
+  - "A-WCAP-WC001545-BL-COL-45981BL1"
+  - "A-WCAP-WC001547-BL-COL-57785BL1"
+  - "A-WCAP-WC001543-BL-COL-45240BL1"
+  - "A-WCAP-WC001542-BL-COL-40920BL1"
+  - "A-WCAP-WC001541-BL-COL-41762BL1"
+  - "A-WCAP-WC001544-BL-COL-49161BL1"
+splitvcfdirs:
+  - class: Directory
+    location: keep:47e5a217867e6a69efe10378541b38e7+7816
+  - class: Directory
+    location: keep:00d1dac7ab3769e1b600129643b3f7bc+8361
+  - class: Directory
+    location: keep:27e8ae061969d12e6137aa9b2e9b585c+6128
+  - class: Directory
+    location: keep:24eb9ef3c45fae2569077d429f121bbd+8778
+  - class: Directory
+    location: keep:00a065a8a5e71acfd083172de3a86d4b+6930
+  - class: Directory
+    location: keep:c751a4f4778156180605ebc04bad6cc1+5894
+  - class: Directory
+    location: keep:7f94ff84914a9d0d873f5313e7124c55+5642
+  - class: Directory
+    location: keep:987b13f6ad4974b796cc2ec8f270d19d+6356
+  - class: Directory
+    location: keep:2284173a8cbcf3e950a41d385069d327+7622
+  - class: Directory
+    location: keep:db084c1516d23a4c6c746105d58a08f3+6356
+  - class: Directory
+    location: keep:a823e23a5f0822a981fa720a2bcb2287+6062
+  - class: Directory
+    location: keep:c7bd505077ecd399b59176f8d5bc34aa+5808
+  - class: Directory
+    location: keep:a590e7f19b8f19f1babdd8b7795e1c20+5681
+  - class: Directory
+    location: keep:1e1232a1a1df7b39605a4630464ef864+5517
+  - class: Directory
+    location: keep:f5c57ac585ab060ca4f9af439acd3e78+5682
+  - class: Directory
+    location: keep:714619e20a6cb2220fae47d5519d2b9f+5093
+  - class: Directory
+    location: keep:d572b35e03d4a2545e4c917506738917+5383
+  - class: Directory
+    location: keep:cca9ddebe18cdde474f9b9ceb33c0247+5768
+  - class: Directory
+    location: keep:55a3599a3b6adac75c19772f2fd0e080+5683
+  - class: Directory
+    location: keep:f4725fe4ae3032ff1f6852701aef182b+5176
+  - class: Directory
+    location: keep:a046387c19b4b4e92d3728cfb5c2239d+5468
+  - class: Directory
+    location: keep:ca6aabbfe01db391a27c755f413f7e24+5010
+  - class: Directory
+    location: keep:f880cb3c4fb2dee626a7afdce73f6b35+5051
+  - class: Directory
+    location: keep:213d15a3e1e2642b593449943d54f940+5851
+  - class: Directory
+    location: keep:e5df0c86fe692bdc234acdea89c09735+5512
+  - class: Directory
+    location: keep:3dcd72a55d24501eb4eeab04e735bc9e+6058
+  - class: Directory
+    location: keep:a56ad75332c6e504237d20f17006b306+5558
+  - class: Directory
+    location: keep:15fae2b96e4f4c28e9473755ade2beb0+5515
+  - class: Directory
+    location: keep:7be46744f59209dd25710bfa8bfb59d0+6527
+  - class: Directory
+    location: keep:bd0a1e4399598a231a8e78c475e94e22+5769
+  - class: Directory
+    location: keep:209ab994cae2c50c0f0f409cecfe0af4+5343
+  - class: Directory
+    location: keep:507185affe0707d6eb0269008717be79+4756
+  - class: Directory
+    location: keep:57af40c026feaf3da5ab7e095caeeae1+5725
+  - class: Directory
+    location: keep:3f515cba6b180bb7aa151ab05ce43270+5936
+  - class: Directory
+    location: keep:15350af160d548a437e45a1ca0432363+7786
+  - class: Directory
+    location: keep:dd76f2ee8fc0579b64e685d30e5d9922+5427
+  - class: Directory
+    location: keep:fde51804f15e0fea5a6bb7be37e1d262+6734
+  - class: Directory
+    location: keep:141fb2f192c4e1efdb6e373543022ab7+6568
+  - class: Directory
+    location: keep:2a96bea877d4a9cf25753c1298f34e58+6612
+  - class: Directory
+    location: keep:6edb216921b036a20cdd32583f2970b6+6022
+  - class: Directory
+    location: keep:f26422b6b05b8bb1e9f486e5c09051b2+5640
+  - class: Directory
+    location: keep:4ed2571eace3eb2963867ca835862646+6061
+  - class: Directory
+    location: keep:a5bf8756702a8f79723d3b134a8c6cff+5725
+  - class: Directory
+    location: keep:05fe61865950248bc6ed9f732426f42a+5385
+  - class: Directory
+    location: keep:e48587348ce4b238ad6594f3a862fca7+7832
+  - class: Directory
+    location: keep:802ba4f4f4a04e53f9e3120f5a871fd9+6902
+  - class: Directory
+    location: keep:22b66f6b397d2e051740d0b3b896c13c+5892
+  - class: Directory
+    location: keep:a6666076ab9bf6963e52d82206b17581+5429
+  - class: Directory
+    location: keep:ed99ae4b5448d5e998444cdc2d288c4f+5978
+  - class: Directory
+    location: keep:31e562eecd2259dc0404f83f138eb13a+6814
+  - class: Directory
+    location: keep:4b247a882829c85824ca49309e51f8b3+5470
+  - class: Directory
+    location: keep:4a3a45a029be557ceb627050b278404c+5097
+  - class: Directory
+    location: keep:96576082494eacc33d34891643247e16+5639
+  - class: Directory
+    location: keep:7849fd811c58ff9797956ca88885c072+5134
+  - class: Directory
+    location: keep:8281291d46712c4dee2929be01a8459d+5935
+  - class: Directory
+    location: keep:afdc5af01594e0e0372ab17287575db3+5427
+  - class: Directory
+    location: keep:aa783333788f5dd554055074ed4cb5ab+5384
+  - class: Directory
+    location: keep:21c8d76ef6ee9950cd2bb641b226a57b+5934
+  - class: Directory
+    location: keep:57a4338099666f13ab7cd05bad7c67c8+5892
+  - class: Directory
+    location: keep:04a1b83e91062b8c43eb3d470aaa6c64+5051
+  - class: Directory
+    location: keep:ad31d97aa3d355a666fe07da625f3994+6482
+  - class: Directory
+    location: keep:98acdba4fb52ac698eaad7449660227e+5517
+  - class: Directory
+    location: keep:03b7cd1daf28b6dcc913a45342f37c96+6482
+  - class: Directory
+    location: keep:60c42c1bad792d1d1ebc4c40420e8032+7030
+  - class: Directory
+    location: keep:3e7ef8e480273a67e223db2842d38e43+5513
+  - class: Directory
+    location: keep:b8ce59383ab761c76b35c91773409bf8+6692
+  - class: Directory
+    location: keep:2c7a882d3f13a0299baf866dc83872d7+7029
+  - class: Directory
+    location: keep:8740baf8f9730eff6d40a918a4c20f90+5384
+  - class: Directory
+    location: keep:8e63209016939215a48def1b350dee0a+6650
+  - class: Directory
+    location: keep:cee36503dcd257a70630396eab59e6c1+6481
+  - class: Directory
+    location: keep:d17a17d9fb4d05cbcadde06b99fae806+5430
+  - class: Directory
+    location: keep:c7f9b800e363290047d61904cc872c3e+5769
+  - class: Directory
+    location: keep:2dee32c1ab8b1fcc264458ae2609a18c+4887
+  - class: Directory
+    location: keep:9e18a67bc403b4d51ee4f556c597b689+5932
+  - class: Directory
+    location: keep:7bf3c8ef6a8ed7b4563569e1e4b85154+5051
+  - class: Directory
+    location: keep:cf3ca53fe3fe7955cd8993c9f2bdd24d+5682
+  - class: Directory
+    location: keep:8716d3eff15d14b0a072e481698fc715+6485
+  - class: Directory
+    location: keep:14b53d263217e13caf5755c66b8f9232+4884
+  - class: Directory
+    location: keep:df79fb7025f8706f20ed678e1916fd15+5424
+  - class: Directory
+    location: keep:e90c5745c169fc9f945463fcd065cdfd+4842
+  - class: Directory
+    location: keep:2e65619e3e557ae435b8b24cad86ce0e+6440
+  - class: Directory
+    location: keep:a67a94826b54eb78a4c3e582233579f8+6482
+  - class: Directory
+    location: keep:c184cd5457f7026ba8112492e3741036+6397
+  - class: Directory
+    location: keep:d5c5c607fd49300d94ecd7de39592eff+5177
+  - class: Directory
+    location: keep:3c7bf8b576bce2567590f90362ef0edf+5009
+  - class: Directory
+    location: keep:f83f53f0698ad52a1cb9b265a451eba8+5051
+  - class: Directory
+    location: keep:7680642fc1c8741d6657ca8b30675661+5013
+  - class: Directory
+    location: keep:55e6c7dc3edefb6625ee47ddfbe86f10+6648
+  - class: Directory
+    location: keep:0abaab02ff171c7a3d283ec54c845498+5515
+  - class: Directory
+    location: keep:dc949aef3a7959dc5259aa9e5caff0ac+6525
+  - class: Directory
+    location: keep:52f5abd360f99207bc7266f8f3b4e2ba+5512
+  - class: Directory
+    location: keep:3c0cb444429a6cf0be2ffc6e0359a345+6524
+  - class: Directory
+    location: keep:c6d33856d6620ed3b7dfcfaa9e4fa987+5343
+  - class: Directory
+    location: keep:84dc794f57a9fbf51f92a9add486702b+5341
+  - class: Directory
+    location: keep:f1b7173ab79d41035044f8ffa7ea5595+7956
+  - class: Directory
+    location: keep:7362951a09e4177e83af2ce779700ab8+6188
+  - class: Directory
+    location: keep:5f45b74d16fc04376ff3a16d30518ebb+5935
+  - class: Directory
+    location: keep:0a35ed7284d0851c7a2698026837c604+6900
+  - class: Directory
+    location: keep:0f3da67b2ad0df2886e7fe1e1c1b5338+6777
+  - class: Directory
+    location: keep:7ad01e84179c627d92b067ebe0c1e7bb+5893
diff --git a/cwl/gvcf2fasta/yml/untar-concat-get_bed_varonlyvcf-test.yml b/cwl/gvcf2fasta/yml/untar-concat-get_bed_varonlyvcf-test.yml
new file mode 100644 (file)
index 0000000..5d893fb
--- /dev/null
@@ -0,0 +1,12 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sampleid: "A-WCAP-WC000711-BL-COL-39141BL1"
+vcftar:
+  class: File
+  location: keep:9b09803eb9ccd2b31d4db4ce80ed7a03+5003343/A-WCAP-WC000711-BL-COL-39141BL1_vcpa1.0_gatk_GRU-IRB-PUB.gvcf.tar.gz
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
diff --git a/cwl/imputation/bcftools-concat.cwl b/cwl/imputation/bcftools-concat.cwl
new file mode 100644 (file)
index 0000000..8809250
--- /dev/null
@@ -0,0 +1,36 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: beagle5.4
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 5000
+    tmpdirMin: 10000
+inputs:
+  sample: string
+  vcfs:
+    type: File[]
+    secondaryFiles: [.tbi]
+outputs:
+  vcf:
+    type: File
+    outputBinding:
+      glob: "*vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [bcftools, concat]
+arguments:
+  - $(inputs.vcfs)
+  - "-Oz"
+  - prefix: "-o"
+    valueFrom: $(inputs.sample)_rawimputed.vcf.gz
+  - shellQuote: false
+    valueFrom: "&&"
+  - "tabix"
+  - $(inputs.sample)_rawimputed.vcf.gz
diff --git a/cwl/imputation/beagle.cwl b/cwl/imputation/beagle.cwl
new file mode 100644 (file)
index 0000000..4f99f40
--- /dev/null
@@ -0,0 +1,65 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: beagle5.4
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 10000
+inputs:
+  sample: string
+  chr: string
+  ref: File
+  map: File
+  vcf:
+    type: File
+    secondaryFiles: [.tbi]
+outputs:
+  rawimputedvcf:
+    type: File
+    outputBinding:
+      glob: "*.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [bcftools, view]
+arguments:
+  - $(inputs.vcf)
+  - prefix: "--regions"
+    valueFrom: $(inputs.chr)
+  - "-Oz"
+  - prefix: "-o"
+    valueFrom: $(inputs.sample)_$(inputs.chr).vcf.gz
+  - shellQuote: false
+    valueFrom: "&&"
+  - "java"
+  - -Xms$(runtime.ram)m
+  - prefix: "-jar"
+    valueFrom: "/beagle.05May22.33a.jar"
+  - prefix: "ref="
+    separate: false
+    valueFrom: $(inputs.ref)
+  - prefix: "map="
+    separate: false
+    valueFrom: $(inputs.map)
+  - prefix: "gt="
+    separate: false
+    valueFrom: $(inputs.sample)_$(inputs.chr).vcf.gz
+  - prefix: "out="
+    separate: false
+    valueFrom: $(inputs.sample)_rawimputed_$(inputs.chr)
+  - prefix: "nthreads="
+    separate: false
+    valueFrom: $(runtime.cores)
+  - shellQuote: false
+    valueFrom: "&&"
+  - "tabix"
+  - $(inputs.sample)_rawimputed_$(inputs.chr).vcf.gz
+  - shellQuote: false
+    valueFrom: "&&"
+  - "rm"
+  - $(inputs.sample)_$(inputs.chr).vcf.gz
diff --git a/cwl/imputation/bedtools-intersect.cwl b/cwl/imputation/bedtools-intersect.cwl
new file mode 100644 (file)
index 0000000..a3d230c
--- /dev/null
@@ -0,0 +1,24 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sample: string
+  a: File
+  b: File
+outputs:
+  intersectbed: stdout
+baseCommand: [bedtools, intersect]
+arguments:
+  - prefix: "-a"
+    valueFrom: $(inputs.a)
+  - prefix: "-b"
+    valueFrom: $(inputs.b)
+stdout: $(inputs.sample)_intersect.bed
diff --git a/cwl/imputation/bedtools-subtract.cwl b/cwl/imputation/bedtools-subtract.cwl
new file mode 100644 (file)
index 0000000..e8feb4f
--- /dev/null
@@ -0,0 +1,24 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sample: string
+  a: File
+  b: File
+outputs:
+  subtractbed: stdout
+baseCommand: [bedtools, subtract]
+arguments:
+  - prefix: "-a"
+    valueFrom: $(inputs.a)
+  - prefix: "-b"
+    valueFrom: $(inputs.b)
+stdout: $(inputs.sample)_subtract.bed
diff --git a/cwl/imputation/get-imputedvcf.cwl b/cwl/imputation/get-imputedvcf.cwl
new file mode 100644 (file)
index 0000000..fec7bea
--- /dev/null
@@ -0,0 +1,45 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sample: string
+  vcf: File
+outputs:
+  imputedvcf:
+    type: File
+    outputBinding:
+      glob: "*.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: zcat
+arguments:
+  - $(inputs.vcf)
+  - shellQuote: false
+    valueFrom: "|"
+  - "egrep"
+  - "^#|IMP"
+  - shellQuote: false
+    valueFrom: "|"
+  - "egrep"
+  - prefix: "-v"
+    valueFrom: '0\|0'
+  - shellQuote: false
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: false
+    valueFrom: ">"
+  - $(inputs.sample).vcf.gz
+  - shellQuote: false
+    valueFrom: "&&"
+  - "tabix"
+  - $(inputs.sample).vcf.gz
diff --git a/cwl/imputation/get-phasedvcf.cwl b/cwl/imputation/get-phasedvcf.cwl
new file mode 100644 (file)
index 0000000..f78b381
--- /dev/null
@@ -0,0 +1,41 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sample: string
+  vcf: File
+outputs:
+  phasedvcf:
+    type: File
+    outputBinding:
+      glob: "*.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: zcat
+arguments:
+  - $(inputs.vcf)
+  - shellQuote: false
+    valueFrom: "|"
+  - "egrep"
+  - prefix: "-v"
+    valueFrom: '0\|0|IMP'
+  - shellQuote: false
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: false
+    valueFrom: ">"
+  - $(inputs.sample).vcf.gz
+  - shellQuote: false
+    valueFrom: "&&"
+  - "tabix"
+  - $(inputs.sample).vcf.gz
diff --git a/cwl/imputation/imputation-wf.cwl b/cwl/imputation/imputation-wf.cwl
new file mode 100644 (file)
index 0000000..6a6782d
--- /dev/null
@@ -0,0 +1,58 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: Workflow
+requirements:
+  SubworkflowFeatureRequirement: {}
+inputs:
+  sample: string
+  chrs:
+    type: string[]
+  refsdir: Directory
+  mapsdir: Directory
+  vcf:
+    type: File
+    secondaryFiles: [.tbi]
+  nocallbed: File
+  panelnocallbed: File
+  panelcallbed: File
+  genomebed: File
+
+outputs:
+  phasedimputedvcf:
+    type: File
+    outputSource: merge-phased-imputed-wf/phasedimputedvcf
+  phasedimputednocallbed:
+    type: File
+    outputSource: merge-phased-imputed-wf/phasedimputednocallbed
+
+steps:
+  rtg-vcffilter:
+    run: rtg-vcffilter.cwl
+    in:
+      sample: sample
+      vcf: vcf
+      excludebed: nocallbed
+    out: [filteredvcf]
+  scatter-beagle-wf:
+    run: scatter-beagle-wf.cwl
+    in:
+      sample: sample
+      chrs: chrs
+      refsdir: refsdir
+      mapsdir: mapsdir
+      vcf: rtg-vcffilter/filteredvcf
+    out: [rawimputedvcf]
+  merge-phased-imputed-wf:
+    run: merge-phased-imputed-wf.cwl
+    in:
+      sample: sample
+      vcf: rtg-vcffilter/filteredvcf
+      nocallbed: nocallbed
+      rawimputedvcf: scatter-beagle-wf/rawimputedvcf
+      panelnocallbed: panelnocallbed
+      panelcallbed: panelcallbed
+      genomebed: genomebed
+    out: [phasedimputedvcf, phasedimputednocallbed]
diff --git a/cwl/imputation/match-ref-map-chr.cwl b/cwl/imputation/match-ref-map-chr.cwl
new file mode 100644 (file)
index 0000000..2df9bc7
--- /dev/null
@@ -0,0 +1,42 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: ExpressionTool
+requirements:
+  InlineJavascriptRequirement: {}
+hints:
+  LoadListingRequirement:
+    loadListing: shallow_listing
+inputs:
+  chrs: string[]
+  refsdir: Directory
+  mapsdir: Directory
+outputs:
+  refs:
+    type: File[]
+  maps:
+    type: File[]
+expression: |
+  ${
+    var refs = [];
+    var maps = [];
+
+    for (var i = 0; i < inputs.chrs.length; i++) {
+      for (var j = 0; j < inputs.refsdir.listing.length; j++) {
+        var file = inputs.refsdir.listing[j];
+        if (file.nameext == ".bref3" && file.basename.indexOf(inputs.chrs[i]+".") != -1) {
+          refs.push(file);
+        }
+      }
+      for (var j = 0; j < inputs.mapsdir.listing.length; j++) {
+        var file = inputs.mapsdir.listing[j];
+        if (file.nameext == ".map" && file.basename.indexOf(inputs.chrs[i]+".") != -1) {
+          maps.push(file);
+        }
+      }
+    }
+
+    return {"refs": refs, "maps": maps};
+  }
diff --git a/cwl/imputation/merge-phased-imputed-wf.cwl b/cwl/imputation/merge-phased-imputed-wf.cwl
new file mode 100644 (file)
index 0000000..5b75f21
--- /dev/null
@@ -0,0 +1,69 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: Workflow
+inputs:
+  sample: string
+  vcf:
+    type: File
+    secondaryFiles: [.tbi]
+  nocallbed: File
+  rawimputedvcf:
+    type: File
+    secondaryFiles: [.tbi]
+  panelnocallbed: File
+  panelcallbed: File
+  genomebed: File
+
+outputs:
+  phasedimputedvcf:
+    type: File
+    outputSource: rtg-vcfmerge/phasedimputedvcf
+  phasedimputednocallbed:
+    type: File
+    outputSource: bedtools-intersect_phasedimputednocallbed/intersectbed
+
+steps:
+  get-phasedvcf:
+    run: get-phasedvcf.cwl
+    in:
+      sample: sample
+      vcf: rawimputedvcf
+    out: [phasedvcf]
+  get-imputedvcf:
+    run: get-imputedvcf.cwl
+    in:
+      sample: sample
+      vcf: rawimputedvcf
+    out: [imputedvcf]
+  bedtools-intersect_phasedimputednocallbed:
+    run: bedtools-intersect.cwl
+    in:
+      sample: sample
+      a: nocallbed
+      b: panelnocallbed
+    out: [intersectbed]
+  bedtools-intersect_imputationbed:
+    run: bedtools-intersect.cwl
+    in:
+      sample: sample
+      a: nocallbed
+      b: panelcallbed
+    out: [intersectbed]
+  rtg-vcffilter-bedtools-intersect:
+    run: rtg-vcffilter-bedtools-intersect.cwl
+    in:
+      sample: sample
+      vcf: get-imputedvcf/imputedvcf
+      bed: bedtools-intersect_imputationbed/intersectbed
+    out: [filteredvcf]
+  rtg-vcfmerge:
+    run: rtg-vcfmerge.cwl
+    in:
+      sample: sample
+      vcf: vcf
+      phasedvcf: get-phasedvcf/phasedvcf
+      imputedvcf: rtg-vcffilter-bedtools-intersect/filteredvcf
+    out: [phasedimputedvcf]
diff --git a/cwl/imputation/rtg-vcffilter-bedtools-intersect.cwl b/cwl/imputation/rtg-vcffilter-bedtools-intersect.cwl
new file mode 100644 (file)
index 0000000..dfacd09
--- /dev/null
@@ -0,0 +1,54 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sample: string
+  vcf:
+    type: File
+    secondaryFiles: [.tbi]
+  bed: File
+outputs:
+  filteredvcf:
+    type: File
+    outputBinding:
+      glob: "*.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [rtg, vcffilter]
+arguments:
+  - "--remove-overlapping"
+  - prefix: "-i"
+    valueFrom: $(inputs.vcf)
+  - prefix: "-o"
+    valueFrom: "-"
+  - shellQuote: false
+    valueFrom: "|"
+  - "bedtools"
+  - "intersect"
+  - "-header"
+  - prefix: "-f"
+    valueFrom: "1"
+  - prefix: "-a"
+    valueFrom: "stdin"
+  - prefix: "-b"
+    valueFrom: $(inputs.bed)
+  - shellQuote: false
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: false
+    valueFrom: ">"
+  - $(inputs.sample).vcf.gz
+  - shellQuote: false
+    valueFrom: "&&"
+  - "tabix"
+  - $(inputs.sample).vcf.gz
diff --git a/cwl/imputation/rtg-vcffilter.cwl b/cwl/imputation/rtg-vcffilter.cwl
new file mode 100644 (file)
index 0000000..e59a0c2
--- /dev/null
@@ -0,0 +1,31 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sample: string
+  vcf:
+    type: File
+    secondaryFiles: [.tbi]
+  excludebed: File
+outputs:
+  filteredvcf:
+    type: File
+    outputBinding:
+      glob: "*.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [rtg, vcffilter]
+arguments:
+  - prefix: "-i"
+    valueFrom: $(inputs.vcf)
+  - prefix: "-o"
+    valueFrom: $(inputs.sample).vcf.gz
+  - prefix: "--exclude-bed"
+    valueFrom: $(inputs.excludebed)
diff --git a/cwl/imputation/rtg-vcfmerge.cwl b/cwl/imputation/rtg-vcfmerge.cwl
new file mode 100644 (file)
index 0000000..ef95d1f
--- /dev/null
@@ -0,0 +1,36 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: CommandLineTool
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 5000
+inputs:
+  sample: string
+  vcf:
+    type: File
+    secondaryFiles: [.tbi]
+  phasedvcf:
+    type: File
+    secondaryFiles: [.tbi]
+  imputedvcf:
+    type: File
+    secondaryFiles: [.tbi]
+outputs:
+  phasedimputedvcf:
+    type: File
+    outputBinding:
+      glob: "*.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [rtg, vcfmerge]
+arguments:
+  - "--force-merge-all"
+  - $(inputs.phasedvcf)
+  - $(inputs.vcf)
+  - $(inputs.imputedvcf)
+  - prefix: "-o"
+    valueFrom: $(inputs.sample)_phased_imputed.vcf.gz
diff --git a/cwl/imputation/scatter-beagle-wf.cwl b/cwl/imputation/scatter-beagle-wf.cwl
new file mode 100644 (file)
index 0000000..1f80c00
--- /dev/null
@@ -0,0 +1,48 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.1
+class: Workflow
+requirements:
+  ScatterFeatureRequirement: {}
+inputs:
+  sample: string
+  chrs:
+    type: string[]
+  refsdir: Directory
+  mapsdir: Directory
+  vcf:
+    type: File
+    secondaryFiles: [.tbi]
+
+outputs:
+  rawimputedvcf:
+    type: File
+    outputSource: bcftools-concat/vcf
+
+steps:
+  match-ref-map-chr:
+    run: match-ref-map-chr.cwl
+    in:
+      chrs: chrs
+      refsdir: refsdir
+      mapsdir: mapsdir
+    out: [refs, maps]
+  beagle:
+    scatter: [chr, ref, map]
+    scatterMethod: dotproduct
+    run: beagle.cwl
+    in:
+      sample: sample
+      chr: chrs
+      ref: match-ref-map-chr/refs
+      map: match-ref-map-chr/maps
+      vcf: vcf
+    out: [rawimputedvcf]
+  bcftools-concat:
+    run: bcftools-concat.cwl
+    in:
+      sample: sample
+      vcfs: beagle/rawimputedvcf
+    out: [vcf]
diff --git a/cwl/imputation/yml/imputation-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/imputation/yml/imputation-A-PRHS-PR000971-BL-COL-47620BL1.yml
new file mode 100644 (file)
index 0000000..f6562ee
--- /dev/null
@@ -0,0 +1,27 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"]
+sample: "A-PRHS-PR000971-BL-COL-47620BL1"
+refsdir:
+  class: Directory
+  location: keep:3a1673a2230877bfaf92b50f7376529c+10590
+mapsdir:
+  class: Directory
+  location: keep:10b1baade985c576a97dfc37d12b953b+1096
+vcf:
+  class: File
+  location: keep:4bb2da1e66635abad432315cb5c13021+553/A-PRHS-PR000971-BL-COL-47620BL1_varonly.vcf.gz
+nocallbed:
+  class: File
+  location: keep:4bb2da1e66635abad432315cb5c13021+553/A-PRHS-PR000971-BL-COL-47620BL1_nocall.bed
+panelnocallbed:
+  class: File
+  location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed
+panelcallbed:
+  class: File
+  location: keep:ea4364c48def931b8cdaf07913a3657b+94/HG001_GRCh38_1_22_v4.2.1_benchmark.bed
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
diff --git a/cwl/imputation/yml/rtg-vcffilter-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/imputation/yml/rtg-vcffilter-A-PRHS-PR000971-BL-COL-47620BL1.yml
new file mode 100644 (file)
index 0000000..f7c5199
--- /dev/null
@@ -0,0 +1,11 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sample: "A-PRHS-PR000971-BL-COL-47620BL1"
+vcf:
+  class: File
+  location: keep:d2db452933c90d6d116107cc687660fb+553/A-PRHS-PR000971-BL-COL-47620BL1_varonly.vcf.gz
+excludebed:
+  class: File
+  location: keep:d2db452933c90d6d116107cc687660fb+553/A-PRHS-PR000971-BL-COL-47620BL1_nocall.bed
diff --git a/cwl/imputation/yml/rtg-vcffilter-test.yml b/cwl/imputation/yml/rtg-vcffilter-test.yml
new file mode 100644 (file)
index 0000000..ced2ea6
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+sample: "A-PRHS-PR000971-BL-COL-47620BL1"
+vcf:
+  class: File
+  location: keep:4bb2da1e66635abad432315cb5c13021+553/A-PRHS-PR000971-BL-COL-47620BL1_varonly.vcf.gz
+includebed:
+  class: File
+  location: keep:4bb2da1e66635abad432315cb5c13021+553/A-PRHS-PR000971-BL-COL-47620BL1_nocall.bed
+excludebed:
+  class: File
+  location: keep:4bb2da1e66635abad432315cb5c13021+553/A-PRHS-PR000971-BL-COL-47620BL1_nocall.bed
+
diff --git a/cwl/lightning/batch-dirs.cwl b/cwl/lightning/batch-dirs.cwl
new file mode 100644 (file)
index 0000000..d1caa68
--- /dev/null
@@ -0,0 +1,31 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.2
+class: ExpressionTool
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  dirs:
+    type:
+      type: array
+      items: Directory
+  batchsize:
+    type: int
+outputs:
+  batches:
+    type:
+      type: array
+      items:
+        type: array
+        items: Directory
+expression: |
+  ${
+    var batches = [];
+    for (var i = 0; i < inputs.dirs.length; i+=inputs.batchsize) {
+      var batch = inputs.dirs.slice(i, i+inputs.batchsize);
+      batches.push(batch);
+    }
+    return {"batches": batches};
+  }
diff --git a/cwl/lightning/fasta2numpy-multi-wf.cwl b/cwl/lightning/fasta2numpy-multi-wf.cwl
new file mode 100644 (file)
index 0000000..f5ce6a9
--- /dev/null
@@ -0,0 +1,138 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: Workflow
+requirements:
+  ScatterFeatureRequirement: {}
+  SubworkflowFeatureRequirement: {}
+  StepInputExpressionRequirement: {}
+#hints:
+#  arv:IntermediateOutput:
+#    outputTTL: 604800
+
+inputs:
+  tagset:
+    type: File
+  fastadirs:
+    type:
+      type: array
+      items: Directory
+  refdirs:
+    type:
+      type: array
+      items: Directory
+  batchsize:
+    type: int
+  matchgenome_array:
+    type: string[]
+  regions_nestedarray:
+    type:
+      type: array
+      items:
+        type: array
+        items: [File, "null"]
+  threads_array:
+    type: int[]
+  mergeoutput_array:
+    type: string[]
+  expandregions_array:
+    type: int[]
+  phenotypesdir:
+    type: Directory
+  chrs: string[]
+  snpeffdatadir: Directory
+  genomeversion_array: string[]
+  dbsnp:
+    type: File
+    secondaryFiles: [.csi]
+  gnomaddir: Directory
+  readmeinfo: string[]
+
+outputs:
+  stagednpydir:
+    type: Directory[]
+    outputSource: lightning-slice-numpy-anno2vcf-wf/stagednpydir
+  stagedonehotnpydir:
+    type: Directory[]
+    outputSource: lightning-slice-numpy-anno2vcf-wf/stagedonehotnpydir
+  stagedannotationdir:
+    type:
+      type: array
+      items: [Directory, "null"]
+    outputSource: lightning-slice-numpy-anno2vcf-wf/stagedannotationdir
+  readme:
+    type: File[]
+    outputSource: lightning-slice-numpy-anno2vcf-wf/readme
+
+steps:
+  batch-dirs:
+    run: batch-dirs.cwl
+    in:
+      dirs: fastadirs
+      batchsize: batchsize
+    out: [batches]
+
+  lightning-import_data:
+    run: lightning-import.cwl
+    scatter: fastadirs
+    in:
+      saveincomplete:
+        valueFrom: "false"
+      tagset: tagset
+      fastadirs: batch-dirs/batches
+    out: [lib]
+
+  lightning-import_refs:
+    run: lightning-import.cwl
+    scatter: fastadirs
+    in:
+      saveincomplete:
+        valueFrom: "true"
+      tagset: tagset
+      fastadirs: refdirs
+    out: [lib]
+
+  lightning-slice:
+    run: lightning-slice.cwl
+    scatter: reflib
+    in:
+      datalibs: lightning-import_data/lib
+      reflib: lightning-import_refs/lib
+    out: [libdir]
+
+  make-arrays:
+    run: make-arrays.cwl
+    in:
+      matchgenome_array: matchgenome_array
+      libdir_array: lightning-slice/libdir
+      genomeversion_array: genomeversion_array
+      regions_nestedarray: regions_nestedarray
+      threads_array: threads_array
+      mergeoutput_array: mergeoutput_array
+      expandregions_array: expandregions_array
+    out: [full_matchgenome_array, full_libdir_array, full_genomeversion_array, full_regions_array, full_threads_array, full_mergeoutput_array, full_expandregions_array, full_libname_array]
+
+  lightning-slice-numpy-anno2vcf-wf:
+    run: lightning-slice-numpy-anno2vcf-wf.cwl
+    scatter: [matchgenome, libdir, genomeversion, regions, threads, mergeoutput, expandregions, libname]
+    scatterMethod: dotproduct
+    in:
+      matchgenome: make-arrays/full_matchgenome_array
+      libdir: make-arrays/full_libdir_array
+      regions: make-arrays/full_regions_array
+      threads: make-arrays/full_threads_array
+      mergeoutput: make-arrays/full_mergeoutput_array
+      expandregions: make-arrays/full_expandregions_array
+      phenotypesdir: phenotypesdir
+      libname: make-arrays/full_libname_array
+      chrs: chrs
+      snpeffdatadir: snpeffdatadir
+      genomeversion: make-arrays/full_genomeversion_array
+      dbsnp: dbsnp
+      gnomaddir: gnomaddir
+      readmeinfo: readmeinfo
+    out: [stagednpydir, stagedonehotnpydir, stagedannotationdir, readme]
diff --git a/cwl/lightning/fasta2numpy-wf.cwl b/cwl/lightning/fasta2numpy-wf.cwl
new file mode 100644 (file)
index 0000000..a804b9c
--- /dev/null
@@ -0,0 +1,226 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: Workflow
+requirements:
+  ScatterFeatureRequirement: {}
+  SubworkflowFeatureRequirement: {}
+  StepInputExpressionRequirement: {}
+  MultipleInputFeatureRequirement: {}
+
+inputs:
+  tagset:
+    type: File
+  fastadirs:
+    type:
+      type: array
+      items: Directory
+  refdir:
+    type: Directory
+  batchsize:
+    type: int
+  regions:
+    type: File?
+  matchgenome:
+    type: string
+  threads:
+    type: int
+  mergeoutput:
+    type: string
+  expandregions:
+    type: int
+  phenotypesnofamilydir:
+    type: Directory
+  phenotypesdir:
+    type: Directory
+  trainingsetsize:
+    type: float
+  randomseed:
+    type: int
+  pcacomponents:
+    type: int
+  chrs: string[]
+  snpeffdatadir: Directory
+  genomeversion: string
+  dbsnp:
+    type: File
+    secondaryFiles: [.csi]
+  gnomaddir: Directory
+  readmeinfo: string[]
+
+outputs:
+  stagednpydir:
+    type: Directory
+    outputSource: stage-output/stagednpydir
+  stagedonehotnpydir:
+    type: Directory
+    outputSource: stage-output/stagedonehotnpydir
+  stagedannotationdir:
+    type: Directory
+    outputSource: stage-output/stagedannotationdir
+  readme:
+    type: File
+    outputSource: genreadme/readme
+
+steps:
+  batch-dirs:
+    run: batch-dirs.cwl
+    in:
+      dirs: fastadirs
+      batchsize: batchsize
+    out: [batches]
+
+  lightning-import_data:
+    run: lightning-import.cwl
+    scatter: fastadirs
+    in:
+      saveincomplete:
+        valueFrom: "false"
+      tagset: tagset
+      fastadirs: batch-dirs/batches
+    out: [lib]
+
+  lightning-import_refs:
+    run: lightning-import.cwl
+    in:
+      saveincomplete:
+        valueFrom: "true"
+      tagset: tagset
+      fastadirs: refdir
+    out: [lib]
+
+  lightning-slice:
+    run: lightning-slice.cwl
+    in:
+      datalibs: lightning-import_data/lib
+      reflib: lightning-import_refs/lib
+    out: [libdir]
+
+  lightning-tiling-stats:
+    run: lightning-tiling-stats.cwl
+    in:
+      libdir: lightning-slice/libdir
+    out: [bed]
+
+  lightning-choose-samples:
+    run: lightning-choose-samples.cwl
+    in:
+      matchgenome: matchgenome
+      libdir: lightning-slice/libdir
+      phenotypesdir: phenotypesnofamilydir
+      trainingsetsize: trainingsetsize
+      randomseed: randomseed
+    out: [samplescsv]
+
+  lightning-slice-numpy:
+    run: lightning-slice-numpy.cwl
+    in:
+      matchgenome: matchgenome
+      libdir: lightning-slice/libdir
+      regions: regions
+      threads: threads
+      mergeoutput: mergeoutput
+      expandregions: expandregions
+      samplescsv: lightning-choose-samples/samplescsv
+    out: [outdir, npys, chunktagoffsetcsv]
+
+  lightning-slice-numpy-onehot:
+    run: lightning-slice-numpy-onehot.cwl
+    in:
+      matchgenome: matchgenome
+      libdir: lightning-slice/libdir
+      regions: regions
+      threads: threads
+      mergeoutput: mergeoutput
+      expandregions: expandregions
+      samplescsv: lightning-choose-samples/samplescsv
+    out: [outdir, npys]
+
+  lightning-slice-numpy-pca:
+    run: lightning-slice-numpy-pca.cwl
+    in:
+      matchgenome: matchgenome
+      libdir: lightning-slice/libdir
+      regions: regions
+      threads: threads
+      mergeoutput: mergeoutput
+      expandregions: expandregions
+      samplescsv: lightning-choose-samples/samplescsv
+      pcacomponents: pcacomponents
+    out: [outdir, pcanpy, pcasamplescsv]
+
+  lightning-plot_1-2:
+    run: lightning-plot.cwl
+    in:
+      pcanpy: lightning-slice-numpy-pca/pcanpy
+      pcasamplescsv: lightning-slice-numpy-pca/pcasamplescsv
+      phenotypesdir: phenotypesdir
+      xcomponent:
+        valueFrom: "1"
+      ycomponent:
+        valueFrom: "2"
+    out: [png]
+
+  lightning-plot_2-3:
+    run: lightning-plot.cwl
+    in:
+      pcanpy: lightning-slice-numpy-pca/pcanpy
+      pcasamplescsv: lightning-slice-numpy-pca/pcasamplescsv
+      phenotypesdir: phenotypesdir
+      xcomponent:
+        valueFrom: "2"
+      ycomponent:
+        valueFrom: "3"
+    out: [png]
+
+  lightning-anno2vcf-onehot:
+    run: lightning-anno2vcf.cwl
+    in:
+      annodir: lightning-slice-numpy-onehot/outdir
+    out: [vcfdir]
+
+  make-libname:
+    run: make-libname.cwl
+    in:
+      matchgenome: matchgenome
+      genomeversion: genomeversion
+    out: [libname]
+
+  annotate-wf:
+    run: ../annotation/annotate-wf.cwl
+    in:
+      sample: make-libname/libname
+      chrs: chrs
+      vcfdir: lightning-anno2vcf-onehot/vcfdir
+      snpeffdatadir: snpeffdatadir
+      genomeversion: genomeversion
+      dbsnp: dbsnp
+      gnomaddir: gnomaddir
+    out: [annotatedvcf, summary]
+
+  stage-output:
+    run: stage-output.cwl
+    in:
+      libname: make-libname/libname
+      npyfiles:
+        source: [lightning-slice-numpy/npys, lightning-slice-numpy/chunktagoffsetcsv]
+        linkMerge: merge_flattened
+      onehotnpyfiles: lightning-slice-numpy-onehot/npys
+      pcapngs:
+        source: [lightning-plot_1-2/png, lightning-plot_2-3/png]
+        linkMerge: merge_flattened
+      bed: lightning-tiling-stats/bed
+      annotatedvcf: annotate-wf/annotatedvcf
+      summary: annotate-wf/summary
+    out: [stagednpydir, stagedonehotnpydir, stagedannotationdir]
+
+  genreadme:
+    run: genreadme.cwl
+    in:
+      samplescsv: lightning-choose-samples/samplescsv
+      readmeinfo: readmeinfo
+    out: [readme]
diff --git a/cwl/lightning/genreadme.cwl b/cwl/lightning/genreadme.cwl
new file mode 100644 (file)
index 0000000..ec25d41
--- /dev/null
@@ -0,0 +1,27 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.2
+class: CommandLineTool
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 2000
+inputs:
+  samplescsv: File
+  readmeinfo: string[]
+  pythonscript:
+    type: File
+    default:
+      class: File
+      location: src/genreadme.py
+outputs:
+  readme:
+    type: stdout
+arguments:
+  - $(inputs.pythonscript)
+  - $(inputs.samplescsv)
+  - $(inputs.readmeinfo)
+stdout: README
diff --git a/cwl/lightning/imputation-gvcf2fasta-fasta2numpy-wf.cwl b/cwl/lightning/imputation-gvcf2fasta-fasta2numpy-wf.cwl
new file mode 100644 (file)
index 0000000..8e12798
--- /dev/null
@@ -0,0 +1,119 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: Workflow
+requirements:
+  ScatterFeatureRequirement: {}
+  SubworkflowFeatureRequirement: {}
+  StepInputExpressionRequirement: {}
+
+inputs:
+  sampleids:
+    type: string[]
+  splitvcfdirs:
+    type: Directory[]
+  gqcutoff:
+    type: int
+  genomebed:
+    type: File
+  ref:
+    type: File
+  chrs: string[]
+  refsdir: Directory
+  mapsdir: Directory
+  panelnocallbed: File
+  panelcallbed: File
+  tagset:
+    type: File
+  refdir:
+    type: Directory
+  batchsize:
+    type: int
+  regions:
+    type: File?
+  matchgenome:
+    type: string
+  threads:
+    type: int
+  mergeoutput:
+    type: string
+  expandregions:
+    type: int
+
+outputs: []
+
+steps:
+  scatter-gvcf2fasta_splitvcf-imputation-wf:
+    run: ../gvcf2fasta/scatter-gvcf2fasta_splitvcf-imputation-wf.cwl
+    in:
+      sampleids: sampleids
+      splitvcfdirs: splitvcfdirs
+      gqcutoff: gqcutoff
+      genomebed: genomebed
+      ref: ref
+      chrs: chrs
+      refsdir: refsdir
+      mapsdir: mapsdir
+      panelnocallbed: panelnocallbed
+      panelcallbed: panelcallbed
+    out: [fas]
+
+  make-fastadirs:
+    run: make-fastadirs.cwl
+    in:
+      fas: scatter-gvcf2fasta_splitvcf-imputation-wf/fas
+    out: [fastadirs]
+
+  batch-dirs:
+    run: batch-dirs.cwl
+    in:
+      dirs: make-fastadirs/fastadirs
+      batchsize: batchsize
+    out: [batches]
+
+  lightning-import_data:
+    run: lightning-import.cwl
+    scatter: fastadirs
+    in:
+      saveincomplete:
+        valueFrom: "false"
+      tagset: tagset
+      fastadirs: batch-dirs/batches
+    out: [lib]
+
+  lightning-import_refs:
+    run: lightning-import.cwl
+    in:
+      saveincomplete:
+        valueFrom: "true"
+      tagset: tagset
+      fastadirs: refdir
+    out: [lib]
+
+  lightning-slice:
+    run: lightning-slice.cwl
+    in:
+      datalibs: lightning-import_data/lib
+      reflib: lightning-import_refs/lib
+    out: [libdir]
+
+  lightning-tiling-stats:
+    run: lightning-tiling-stats.cwl
+    in:
+      libdir: lightning-slice/libdir
+    out: [bed]
+
+  lightning-slice-numpy:
+    run: lightning-slice-numpy.cwl
+    in:
+      matchgenome: matchgenome
+      libdir: lightning-slice/libdir
+      regions: regions
+      threads: threads
+      mergeoutput: mergeoutput
+      expandregions: expandregions
+    out: [outdir, npys, chunktagoffsetcsv]
diff --git a/cwl/lightning/libray2numpy-wf.cwl b/cwl/lightning/libray2numpy-wf.cwl
new file mode 100644 (file)
index 0000000..f5ce6a9
--- /dev/null
@@ -0,0 +1,138 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: Workflow
+requirements:
+  ScatterFeatureRequirement: {}
+  SubworkflowFeatureRequirement: {}
+  StepInputExpressionRequirement: {}
+#hints:
+#  arv:IntermediateOutput:
+#    outputTTL: 604800
+
+inputs:
+  tagset:
+    type: File
+  fastadirs:
+    type:
+      type: array
+      items: Directory
+  refdirs:
+    type:
+      type: array
+      items: Directory
+  batchsize:
+    type: int
+  matchgenome_array:
+    type: string[]
+  regions_nestedarray:
+    type:
+      type: array
+      items:
+        type: array
+        items: [File, "null"]
+  threads_array:
+    type: int[]
+  mergeoutput_array:
+    type: string[]
+  expandregions_array:
+    type: int[]
+  phenotypesdir:
+    type: Directory
+  chrs: string[]
+  snpeffdatadir: Directory
+  genomeversion_array: string[]
+  dbsnp:
+    type: File
+    secondaryFiles: [.csi]
+  gnomaddir: Directory
+  readmeinfo: string[]
+
+outputs:
+  stagednpydir:
+    type: Directory[]
+    outputSource: lightning-slice-numpy-anno2vcf-wf/stagednpydir
+  stagedonehotnpydir:
+    type: Directory[]
+    outputSource: lightning-slice-numpy-anno2vcf-wf/stagedonehotnpydir
+  stagedannotationdir:
+    type:
+      type: array
+      items: [Directory, "null"]
+    outputSource: lightning-slice-numpy-anno2vcf-wf/stagedannotationdir
+  readme:
+    type: File[]
+    outputSource: lightning-slice-numpy-anno2vcf-wf/readme
+
+steps:
+  batch-dirs:
+    run: batch-dirs.cwl
+    in:
+      dirs: fastadirs
+      batchsize: batchsize
+    out: [batches]
+
+  lightning-import_data:
+    run: lightning-import.cwl
+    scatter: fastadirs
+    in:
+      saveincomplete:
+        valueFrom: "false"
+      tagset: tagset
+      fastadirs: batch-dirs/batches
+    out: [lib]
+
+  lightning-import_refs:
+    run: lightning-import.cwl
+    scatter: fastadirs
+    in:
+      saveincomplete:
+        valueFrom: "true"
+      tagset: tagset
+      fastadirs: refdirs
+    out: [lib]
+
+  lightning-slice:
+    run: lightning-slice.cwl
+    scatter: reflib
+    in:
+      datalibs: lightning-import_data/lib
+      reflib: lightning-import_refs/lib
+    out: [libdir]
+
+  make-arrays:
+    run: make-arrays.cwl
+    in:
+      matchgenome_array: matchgenome_array
+      libdir_array: lightning-slice/libdir
+      genomeversion_array: genomeversion_array
+      regions_nestedarray: regions_nestedarray
+      threads_array: threads_array
+      mergeoutput_array: mergeoutput_array
+      expandregions_array: expandregions_array
+    out: [full_matchgenome_array, full_libdir_array, full_genomeversion_array, full_regions_array, full_threads_array, full_mergeoutput_array, full_expandregions_array, full_libname_array]
+
+  lightning-slice-numpy-anno2vcf-wf:
+    run: lightning-slice-numpy-anno2vcf-wf.cwl
+    scatter: [matchgenome, libdir, genomeversion, regions, threads, mergeoutput, expandregions, libname]
+    scatterMethod: dotproduct
+    in:
+      matchgenome: make-arrays/full_matchgenome_array
+      libdir: make-arrays/full_libdir_array
+      regions: make-arrays/full_regions_array
+      threads: make-arrays/full_threads_array
+      mergeoutput: make-arrays/full_mergeoutput_array
+      expandregions: make-arrays/full_expandregions_array
+      phenotypesdir: phenotypesdir
+      libname: make-arrays/full_libname_array
+      chrs: chrs
+      snpeffdatadir: snpeffdatadir
+      genomeversion: make-arrays/full_genomeversion_array
+      dbsnp: dbsnp
+      gnomaddir: gnomaddir
+      readmeinfo: readmeinfo
+    out: [stagednpydir, stagedonehotnpydir, stagedannotationdir, readme]
diff --git a/cwl/lightning/lightning-anno2vcf.cwl b/cwl/lightning/lightning-anno2vcf.cwl
new file mode 100644 (file)
index 0000000..fb23d5c
--- /dev/null
@@ -0,0 +1,36 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: CommandLineTool
+requirements:
+  NetworkAccess:
+    networkAccess: true
+hints:
+  DockerRequirement:
+    dockerPull: lightning
+  ResourceRequirement:
+    coresMin: 64
+    ramMin: 500000
+  arv:RuntimeConstraints:
+    keep_cache: 83000
+    outputDirType: keep_output_dir
+inputs:
+  annodir: Directory
+outputs:
+  vcfdir:
+    type: Directory
+    outputBinding:
+      glob: "."
+baseCommand: [lightning, anno2vcf]
+arguments:
+  - "-local=true"
+  - prefix: "-input-dir="
+    valueFrom: $(inputs.annodir)
+    separate: false
+  - prefix: "-output-dir="
+    valueFrom: $(runtime.outdir)
+    separate: false
diff --git a/cwl/lightning/lightning-choose-samples.cwl b/cwl/lightning/lightning-choose-samples.cwl
new file mode 100644 (file)
index 0000000..f03c585
--- /dev/null
@@ -0,0 +1,53 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: CommandLineTool
+requirements:
+  NetworkAccess:
+    networkAccess: true
+hints:
+  DockerRequirement:
+    dockerPull: lightning
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 10000
+  arv:RuntimeConstraints:
+    keep_cache: 20000
+    outputDirType: keep_output_dir
+inputs:
+  matchgenome: string
+  libdir: Directory
+  phenotypesdir: Directory
+  trainingsetsize: float
+  randomseed: int
+outputs:
+  samplescsv:
+    type: File
+    outputBinding:
+      glob: "samples.csv"
+baseCommand: [lightning, choose-samples]
+arguments:
+  - "-local=true"
+  - prefix: "-input-dir="
+    valueFrom: $(inputs.libdir)
+    separate: false
+  - prefix: "-output-dir="
+    valueFrom: $(runtime.outdir)
+    separate: false
+  - prefix: "-match-genome="
+    valueFrom: $(inputs.matchgenome)
+    separate: false
+  - prefix: "-case-control-file="
+    valueFrom: $(inputs.phenotypesdir)
+    separate: false
+  - "-case-control-column=AD"
+  - prefix: "-training-set-size="
+    valueFrom: $(inputs.trainingsetsize)
+    separate: false
+  - prefix: "-random-seed="
+    valueFrom: $(inputs.randomseed)
+    separate: false
diff --git a/cwl/lightning/lightning-import.cwl b/cwl/lightning/lightning-import.cwl
new file mode 100644 (file)
index 0000000..693e153
--- /dev/null
@@ -0,0 +1,56 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: CommandLineTool
+requirements:
+  NetworkAccess:
+    networkAccess: true
+hints:
+  DockerRequirement:
+    dockerPull: lightning
+    arv:dockerCollectionPDH: 1f430e6dd9b6be0ae78d4cffde9b1fef+892
+  ResourceRequirement:
+    coresMin: 96
+    ramMin: 670000
+  arv:RuntimeConstraints:
+    keep_cache: 6200
+    outputDirType: keep_output_dir
+inputs:
+  saveincomplete:
+    type: string
+  tagset:
+    type: File
+  fastadirs:
+    type:
+      - Directory
+      - type: array
+        items: Directory
+outputs:
+  lib:
+    type: File
+    outputBinding:
+      glob: "*gob.gz"
+baseCommand: [lightning, import]
+arguments:
+  - "-local=true"
+  - "-loglevel=info"
+  - "-skip-ooo=true"
+  - "-output-tiles=true"
+  - "-batches=1"
+  - "-batch=0"
+  - prefix: "-save-incomplete-tiles="
+    valueFrom: $(inputs.saveincomplete)
+    separate: false
+  - prefix: "-match-chromosome"
+    valueFrom: "^(chr)?([0-9]+|X|Y|M)$"
+  - prefix: "-output-stats"
+    valueFrom: "stats.json"
+  - prefix: "-tag-library"
+    valueFrom: $(inputs.tagset)
+  - prefix: "-o"
+    valueFrom: "library.gob.gz"
+  - $(inputs.fastadirs)
diff --git a/cwl/lightning/lightning-plot.cwl b/cwl/lightning/lightning-plot.cwl
new file mode 100644 (file)
index 0000000..cd4d198
--- /dev/null
@@ -0,0 +1,53 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: CommandLineTool
+requirements:
+  NetworkAccess:
+    networkAccess: true
+hints:
+  DockerRequirement:
+    dockerPull: lightning
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 10000
+  arv:RuntimeConstraints:
+    keep_cache: 20000
+    outputDirType: keep_output_dir
+inputs:
+  pcanpy: File
+  pcasamplescsv: File
+  phenotypesdir: Directory
+  xcomponent: string
+  ycomponent: string
+outputs:
+  png:
+    type: File
+    outputBinding:
+      glob: "*.png"
+baseCommand: [lightning, plot]
+arguments:
+  - "-local=true"
+  - prefix: "-i="
+    valueFrom: $(inputs.pcanpy)
+    separate: false
+  - prefix: "-o="
+    valueFrom: "plot_$(inputs.xcomponent)-$(inputs.ycomponent).png"
+    separate: false
+  - prefix: "-samples="
+    valueFrom: $(inputs.pcasamplescsv)
+    separate: false
+  - prefix: "-phenotype="
+    valueFrom: $(inputs.phenotypesdir)
+    separate: false
+  - "-phenotype-cat1-column=7"
+  - prefix: "-x="
+    valueFrom: $(inputs.xcomponent)
+    separate: false
+  - prefix: "-y="
+    valueFrom: $(inputs.ycomponent)
+    separate: false
diff --git a/cwl/lightning/lightning-slice-numpy-anno2vcf-wf.cwl b/cwl/lightning/lightning-slice-numpy-anno2vcf-wf.cwl
new file mode 100644 (file)
index 0000000..874bc41
--- /dev/null
@@ -0,0 +1,113 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.2
+class: Workflow
+requirements:
+  InlineJavascriptRequirement: {}
+  SubworkflowFeatureRequirement: {}
+  MultipleInputFeatureRequirement: {}
+
+inputs:
+  matchgenome: string
+  libdir: Directory
+  regions: File?
+  threads: int
+  mergeoutput: string
+  expandregions: int
+  phenotypesdir: Directory
+  libname: string
+  chrs: string[]
+  snpeffdatadir: Directory
+  genomeversion: string
+  dbsnp:
+    type: File
+    secondaryFiles: [.csi]
+  gnomaddir: Directory
+  readmeinfo: string[]
+
+outputs:
+  stagednpydir:
+    type: Directory
+    outputSource: stage-output/stagednpydir
+  stagedonehotnpydir:
+    type: Directory
+    outputSource: stage-output/stagedonehotnpydir
+  stagedannotationdir:
+    type: Directory?
+    outputSource: stage-output/stagedannotationdir
+  readme:
+    type: File
+    outputSource: genreadme/readme
+
+steps:
+  lightning-tiling-stats:
+    run: lightning-tiling-stats.cwl
+    when: $(inputs.regions == null)
+    in:
+      libdir: libdir
+    out: [bed]
+
+  lightning-slice-numpy:
+    run: lightning-slice-numpy.cwl
+    in:
+      matchgenome: matchgenome
+      libdir: libdir
+      regions: regions
+      threads: threads
+      mergeoutput: mergeoutput
+      expandregions: expandregions
+    out: [outdir, npys, samplescsv, chunktagoffsetcsv]
+
+  lightning-slice-numpy-onehot:
+    run: lightning-slice-numpy-onehot.cwl
+    in:
+      matchgenome: matchgenome
+      libdir: libdir
+      regions: regions
+      threads: threads
+      mergeoutput: mergeoutput
+      expandregions: expandregions
+      phenotypesdir: phenotypesdir
+    out: [outdir, npys, samplescsv]
+
+  lightning-anno2vcf-onehot:
+    run: lightning-anno2vcf.cwl
+    in:
+      annodir: lightning-slice-numpy-onehot/outdir
+    out: [vcfdir]
+
+  annotate-wf:
+    run: ../annotation/annotate-wf.cwl
+    in:
+      sample: libname
+      chrs: chrs
+      vcfdir: lightning-anno2vcf-onehot/vcfdir
+      snpeffdatadir: snpeffdatadir
+      genomeversion: genomeversion
+      dbsnp: dbsnp
+      gnomaddir: gnomaddir
+    out: [annotatedvcf, summary]
+
+  stage-output:
+    run: stage-output.cwl
+    in:
+      libname: libname
+      npyfiles:
+        source: [lightning-slice-numpy/npys, lightning-slice-numpy/samplescsv, lightning-slice-numpy/chunktagoffsetcsv]
+        linkMerge: merge_flattened
+      onehotnpyfiles:
+        source: [lightning-slice-numpy-onehot/npys, lightning-slice-numpy-onehot/samplescsv]
+        linkMerge: merge_flattened
+      bed: lightning-tiling-stats/bed
+      annotatedvcf: annotate-wf/annotatedvcf
+      summary: annotate-wf/summary
+    out: [stagednpydir, stagedonehotnpydir, stagedannotationdir]
+
+  genreadme:
+    run: genreadme.cwl
+    in:
+      samplescsv: lightning-slice-numpy/samplescsv
+      readmeinfo: readmeinfo
+    out: [readme]
diff --git a/cwl/lightning/lightning-slice-numpy-onehot.cwl b/cwl/lightning/lightning-slice-numpy-onehot.cwl
new file mode 100644 (file)
index 0000000..15ca547
--- /dev/null
@@ -0,0 +1,67 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: CommandLineTool
+requirements:
+  NetworkAccess:
+    networkAccess: true
+hints:
+  DockerRequirement:
+    dockerPull: lightning
+  ResourceRequirement:
+    coresMin: 64
+    ramMin: 660000
+  arv:RuntimeConstraints:
+    keep_cache: 83000
+    outputDirType: keep_output_dir
+inputs:
+  matchgenome: string
+  libdir: Directory
+  regions: File?
+  threads: int
+  mergeoutput: string
+  expandregions: int
+  samplescsv: File
+outputs:
+  outdir:
+    type: Directory
+    outputBinding:
+      glob: "."
+  npys:
+    type: File[]
+    outputBinding:
+      glob: "*npy"
+baseCommand: [lightning, slice-numpy]
+arguments:
+  - "-local=true"
+  - prefix: "-input-dir="
+    valueFrom: $(inputs.libdir)
+    separate: false
+  - prefix: "-output-dir="
+    valueFrom: $(runtime.outdir)
+    separate: false
+  - prefix: "-match-genome="
+    valueFrom: $(inputs.matchgenome)
+    separate: false
+  - prefix: "-regions="
+    valueFrom: $(inputs.regions)
+    separate: false
+  - prefix: "-threads="
+    valueFrom: $(inputs.threads)
+    separate: false
+  - prefix: "-merge-output="
+    valueFrom: $(inputs.mergeoutput)
+    separate: false
+  - prefix: "-expand-regions="
+    valueFrom: $(inputs.expandregions)
+    separate: false
+  - prefix: "-samples="
+    valueFrom: $(inputs.samplescsv)
+    separate: false
+  - "-single-onehot=true"
+  - "-chi2-p-value=0.01"
+  - "-min-coverage=0.9"
diff --git a/cwl/lightning/lightning-slice-numpy-pca.cwl b/cwl/lightning/lightning-slice-numpy-pca.cwl
new file mode 100644 (file)
index 0000000..1e85f04
--- /dev/null
@@ -0,0 +1,75 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: CommandLineTool
+requirements:
+  NetworkAccess:
+    networkAccess: true
+hints:
+  DockerRequirement:
+    dockerPull: lightning
+  ResourceRequirement:
+    coresMin: 64
+    ramMin: 1500000
+  arv:RuntimeConstraints:
+    keep_cache: 83000
+    outputDirType: keep_output_dir
+inputs:
+  matchgenome: string
+  libdir: Directory
+  regions: File?
+  threads: int
+  mergeoutput: string
+  expandregions: int
+  samplescsv: File
+  pcacomponents: int
+outputs:
+  outdir:
+    type: Directory
+    outputBinding:
+      glob: "."
+  pcanpy:
+    type: File
+    outputBinding:
+      glob: "pca.npy"
+  pcasamplescsv:
+    type: File
+    outputBinding:
+      glob: "samples.csv"
+baseCommand: [lightning, slice-numpy]
+arguments:
+  - "-local=true"
+  - prefix: "-input-dir="
+    valueFrom: $(inputs.libdir)
+    separate: false
+  - prefix: "-output-dir="
+    valueFrom: $(runtime.outdir)
+    separate: false
+  - prefix: "-match-genome="
+    valueFrom: $(inputs.matchgenome)
+    separate: false
+  - prefix: "-regions="
+    valueFrom: $(inputs.regions)
+    separate: false
+  - prefix: "-threads="
+    valueFrom: $(inputs.threads)
+    separate: false
+  - prefix: "-merge-output="
+    valueFrom: $(inputs.mergeoutput)
+    separate: false
+  - prefix: "-expand-regions="
+    valueFrom: $(inputs.expandregions)
+    separate: false
+  - prefix: "-samples="
+    valueFrom: $(inputs.samplescsv)
+    separate: false
+  - "-pca=true"
+  - prefix: "-pca-components="
+    valueFrom: $(inputs.pcacomponents)
+    separate: false
+  - "-min-coverage=0.98"
+  - "-max-pca-tiles=100000"
diff --git a/cwl/lightning/lightning-slice-numpy.cwl b/cwl/lightning/lightning-slice-numpy.cwl
new file mode 100644 (file)
index 0000000..9cd8452
--- /dev/null
@@ -0,0 +1,68 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: CommandLineTool
+requirements:
+  NetworkAccess:
+    networkAccess: true
+hints:
+  DockerRequirement:
+    dockerPull: lightning
+  ResourceRequirement:
+    coresMin: 64
+    ramMin: 660000
+  arv:RuntimeConstraints:
+    keep_cache: 83000
+    outputDirType: keep_output_dir
+inputs:
+  matchgenome: string
+  libdir: Directory
+  regions: File?
+  threads: int
+  mergeoutput: string
+  expandregions: int
+  samplescsv: File?
+outputs:
+  outdir:
+    type: Directory
+    outputBinding:
+      glob: "."
+  npys:
+    type: File[]
+    outputBinding:
+      glob: "matrix.*.npy"
+  chunktagoffsetcsv:
+    type: File
+    outputBinding:
+      glob: "chunk-tag-offset.csv"
+baseCommand: [lightning, slice-numpy]
+arguments:
+  - "-local=true"
+  - prefix: "-input-dir="
+    valueFrom: $(inputs.libdir)
+    separate: false
+  - prefix: "-output-dir="
+    valueFrom: $(runtime.outdir)
+    separate: false
+  - prefix: "-match-genome="
+    valueFrom: $(inputs.matchgenome)
+    separate: false
+  - prefix: "-regions="
+    valueFrom: $(inputs.regions)
+    separate: false
+  - prefix: "-threads="
+    valueFrom: $(inputs.threads)
+    separate: false
+  - prefix: "-merge-output="
+    valueFrom: $(inputs.mergeoutput)
+    separate: false
+  - prefix: "-expand-regions="
+    valueFrom: $(inputs.expandregions)
+    separate: false
+  - prefix: "-samples="
+    valueFrom: $(inputs.samplescsv)
+    separate: false
diff --git a/cwl/lightning/lightning-slice.cwl b/cwl/lightning/lightning-slice.cwl
new file mode 100644 (file)
index 0000000..d30edb1
--- /dev/null
@@ -0,0 +1,40 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: CommandLineTool
+requirements:
+  NetworkAccess:
+    networkAccess: true
+hints:
+  DockerRequirement:
+    dockerPull: lightning
+    arv:dockerCollectionPDH: 1f430e6dd9b6be0ae78d4cffde9b1fef+892
+  ResourceRequirement:
+    coresMin: 96
+    ramMin: 660000
+  arv:RuntimeConstraints:
+    keep_cache: 6200
+    outputDirType: keep_output_dir
+inputs:
+  datalibs:
+    type:
+      type: array
+      items: File
+  reflib:
+    type: File
+outputs:
+  libdir:
+    type: Directory
+    outputBinding:
+      glob: "."
+baseCommand: [lightning, slice]
+arguments:
+  - "-local=true"
+  - prefix: "-output-dir"
+    valueFrom: $(runtime.outdir)
+  - $(inputs.datalibs)
+  - $(inputs.reflib)
diff --git a/cwl/lightning/lightning-tiling-stats.cwl b/cwl/lightning/lightning-tiling-stats.cwl
new file mode 100644 (file)
index 0000000..062d256
--- /dev/null
@@ -0,0 +1,35 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.2
+class: CommandLineTool
+requirements:
+  NetworkAccess:
+    networkAccess: true
+hints:
+  DockerRequirement:
+    dockerPull: lightning
+    arv:dockerCollectionPDH: 1f430e6dd9b6be0ae78d4cffde9b1fef+892
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 8000
+  arv:RuntimeConstraints:
+    keep_cache: 10000
+    outputDirType: keep_output_dir
+inputs:
+  libdir: Directory
+outputs:
+  bed:
+    type: File
+    outputBinding:
+      glob: "*bed"
+baseCommand: [lightning, tiling-stats]
+arguments:
+  - "-local=true"
+  - prefix: "-input-dir"
+    valueFrom: $(inputs.libdir)
+  - prefix: "-output-dir"
+    valueFrom: $(runtime.outdir)
diff --git a/cwl/lightning/make-arrays.cwl b/cwl/lightning/make-arrays.cwl
new file mode 100644 (file)
index 0000000..e1ab065
--- /dev/null
@@ -0,0 +1,63 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.2
+class: ExpressionTool
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  matchgenome_array: string[]
+  libdir_array: Directory[]
+  genomeversion_array: string[]
+  regions_nestedarray:
+    type:
+      type: array
+      items:
+        type: array
+        items: [File, "null"]
+  threads_array: int[]
+  mergeoutput_array: string[]
+  expandregions_array: int[]
+outputs:
+  full_matchgenome_array: string[]
+  full_libdir_array: Directory[]
+  full_genomeversion_array: string[]
+  full_regions_array:
+    type:
+      type: array
+      items: [File, "null"]
+  full_threads_array: int[]
+  full_mergeoutput_array: string[]
+  full_expandregions_array: int[]
+  full_libname_array: string[]
+expression: |
+  ${
+    var full_matchgenome_array = [];
+    var full_libdir_array = [];
+    var full_genomeversion_array = [];
+    var full_regions_array = [];
+    var full_threads_array = [];
+    var full_mergeoutput_array = [];
+    var full_expandregions_array = [];
+    var full_libname_array = [];
+    for (var i = 0; i < inputs.matchgenome_array.length; i++) {
+      for (var j = 0; j < inputs.libdir_array.length; j++) {
+        for (var k = 0; k < inputs.regions_nestedarray[j].length; k++) {
+          full_matchgenome_array.push(inputs.matchgenome_array[i]);
+          full_libdir_array.push(inputs.libdir_array[j]);
+          full_genomeversion_array.push(inputs.genomeversion_array[j]);
+          full_regions_array.push(inputs.regions_nestedarray[j][k]);
+          full_threads_array.push(inputs.threads_array[k]);
+          full_mergeoutput_array.push(inputs.mergeoutput_array[k]);
+          full_expandregions_array.push(inputs.expandregions_array[k]);
+          var libname = inputs.genomeversion_array[j]+inputs.matchgenome_array[i]+"_library";
+          full_libname_array.push(libname);
+        }
+      }
+    }
+    return {"full_matchgenome_array": full_matchgenome_array, 
+            "full_libdir_array": full_libdir_array, "full_genomeversion_array": full_genomeversion_array,
+            "full_regions_array": full_regions_array, "full_threads_array": full_threads_array, "full_mergeoutput_array": full_mergeoutput_array, "full_expandregions_array": full_expandregions_array,
+            "full_libname_array": full_libname_array};
+  }
diff --git a/cwl/lightning/make-fastadirs.cwl b/cwl/lightning/make-fastadirs.cwl
new file mode 100644 (file)
index 0000000..e0ad3d5
--- /dev/null
@@ -0,0 +1,35 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.2
+class: ExpressionTool
+requirements:
+  InlineJavascriptRequirement: {}
+hints:
+  LoadListingRequirement:
+    loadListing: shallow_listing
+inputs:
+  fas:
+    type:
+      type: array
+      items:
+        type: array
+        items: File
+outputs:
+  fastadirs: Directory[]
+expression: |
+  ${
+    var fastadirs = [];
+    for (var i = 0; i < inputs.fas.length; i+=100) {
+      var fastadir = {"class": "Directory",
+                      "basename": "dir"+String(i/100),
+                      "listing": []};
+      for (var j = i; j < Math.min(i+100, inputs.fas.length); j+=1) {
+        fastadir.listing.push(inputs.fas[j][0]);
+        fastadir.listing.push(inputs.fas[j][1]);
+      }
+      fastadirs.push(fastadir);
+    }
+    return {"fastadirs": fastadirs};
+  }
diff --git a/cwl/lightning/make-libname.cwl b/cwl/lightning/make-libname.cwl
new file mode 100644 (file)
index 0000000..c9dc33f
--- /dev/null
@@ -0,0 +1,18 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.2
+class: ExpressionTool
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  matchgenome: string
+  genomeversion: string
+outputs:
+  libname: string
+expression: |
+  ${
+    var libname = inputs.genomeversion+inputs.matchgenome+"_library";
+    return {"libname": libname};
+  }
diff --git a/cwl/lightning/readme.md b/cwl/lightning/readme.md
new file mode 100644 (file)
index 0000000..0341864
--- /dev/null
@@ -0,0 +1,17 @@
+Running tiling workflow
+===
+
+Command
+---
+
+arvados-cwl-runner --submit --no-wait --project-uuid <project_uuid> fasta2numpy-wf.cwl <input_yml>
+
+For examples of input yml files, see yml/fasta2numpy-wf-100test.yml and yml/fasta2numpy-wf-0831_0315.yml
+
+Notable parameters for input yml
+---
+
+fastadirs: an array of fasta directories, in our implementation, each directory consists of around 100 fasta pairs
+batchsize: an integer determining the batch size when running lighting-import step, e.g., for batchsize 12, we run lightning-import for 12 fasta directories together as a batch, the resulting libraries then get merged by lightning-slice
+matchgenome: a string pattern used for obtaining a subset of the cohort, e.g, matchgenome "ADNI|WCAP" runs tiling for all samples with "ADNI" or "WCAP" in their name, matchgenome "" runs for the entire cohort
+trainingsetsize: a float between 0 and 1 to determine the training set size
diff --git a/cwl/lightning/src/genreadme.py b/cwl/lightning/src/genreadme.py
new file mode 100755 (executable)
index 0000000..f5fc689
--- /dev/null
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+from __future__ import print_function
+import os
+import sys
+
+text = """h1. Data release readme
+
+Data release candidate: {}
+Description: This dataset contains {} human genomes ({}) encoded using the Lightning tiling system for the AI4AD project. It is published at {}. 
+
+Collection contains:
+* library_full/ -- Full Tiled Data Set
+** matrix.0000.npy, matrix.0001.npy, matrix.0002.npy, ... -- tile variant# for each (sample, tag)
+** chunk-tag-offset.csv -- tag offset for each matrix.NNNN.npy file
+** samples.csv --  sample ID for each row of matrix.NNNN.npy
+* library_filtered/ -- Filtered Tiled Data Set (filtered using chi-square test between tile variants and AD phenotype)
+** onehot.npy -- one-hot representation of tiled data filtered by p-value
+** onehot-columns.npy -- tag, variant, het/hom, p-value for each column of onehot.npy
+** samples.csv -- sample ID for each row of onehot.npy
+* GRCh38.86_library_annotation/ -- Annotations for Tiled Data Set
+** GRCh38.86_library_snpeff_dbsnp_gnomad.vcf.gz -- annotations for each genomic variant found in tiled dataset
+** GRCh38.86_library_snpeff_dbsnp_gnomad.vcf.gz.tbi -- index for annotations vcf
+** GRCh38.86_library_summary.txt -- % of variants in each chromosome that were found in gnomad
+** hg38.fa.gz.bed -- position of tile set in reference genome
+
+Tiling Background:
+
+Tiling abstracts a called genome by partitioning it into overlapping variable length shorter sequences, known as tiles. A tile is a genomic sequence that is braced on either side by 24 base (24-mer) "tags".
+
+A tile sequence must be at least 248 base pairs long where each tile is labeled with a "position" according to the number of tiles before it. One tile position can have multiple tile variants, one for each sequence observed at that position. When a variation occurs on a tag, we allow tile variants to span multiple steps where the tags would normally end. These tiles that span multiple steps are known as "spanning tiles"
+
+Our choice of tags ("tag-set") partition the human reference genome into 10,655,006 tiles, composed of 3.1 billion bases (with an average of around 315 bases per tile). The set of all positions and tile variants are stored in is what we call the tile library. An individual's genome can then be easily represented as an array of tag sets that reference tiles in the tile library. Each position in the array corresponds to a tile position and points to the tile variant observed at that position for that individual.
+
+To create the tiled genomes, we use Lightning, a system that allows for efficient access to large scale population genomic data with a focus on clinical and research use. The Lightning system is a combination of a conceptual way to think about genomes (genomic tiling), the internal representation of genomes for efficient access, and the software that manages access to the data.
+
+h2. Read me for library_full
+
+Directory:  library_full/
+
+Files:
+
+* matrix.XXX.npy:  numpy-encoded matrix with one row per genome, and a pair of columns per tag / tile position (one for each allele). Each matrix element is an integer. For easier loading, the numpy matrix is broken into chunks. : 
+** -1 indicates a "low quality" tile variant containing no-calls.
+** 0 indicates the tag for this tile was not found, i.e., this part of the genome is covered by a spanning tile in an earlier (leftward) column.
+** Tile variants can span multiple tile positions  if a tag is not found and are known as spanning tiles. 
+** 1 indicates the most common high quality variant of this tile in this dataset; 2 indicates the 2nd most common; etc.
+
+* chunk-tag-offset.csv - common separated text file that indicates tag offset for each matrix.NNNN.npy file
+** Columns are file name and offset
+
+* samples.csv: mapping from numpy file (matrix.npy) and row number to input ID for each tiled genome
+** Columns are row number, genome ID (usually taken from tile name of gvcf/vcf, and name of npy output
+        - Example: 0,"A-WCAP-WC000711-BL-COL-39141BL1","matrix.npy"
+
+
+h2. Read me for library_filtered
+
+Directory: library_filtered/
+Files:
+* onehot.npy -- 
+**  The tile variants have been filtered using a chi2 filter between each separate tile variant and the AD phenotype. Only tile positions with 90% coverage are included (i.e. 90% of the tile variants in a tile position do not contain no-calls).  
+** Contains the positions of the non-zeros elements of the filtered sparse matrix.: two rows: 1) row position 2) column position
+** This sparse numpy-encoded matrix has one row per genome, and a pair of columns per tile variant. One column represents the heterozygous tile variant (i.e. tile variant found in 1 allele) and one for homozygous tile variant (i.e. tile variant found in 2 alleles). Each matrix element is an integer with a 1 indicating the tile variant is present in that form and a 0 indicating the tile variant is not present in that format.
+** Can create a sparse matrix with the following commands in python:
+
+import numpy as np
+from scipy.sparse import csr_matrix
+
+Xrc = np.load('onehot.npy')
+data = np.ones(Xrc[0,:].shape)
+row_ind = Xrc[0,:]
+col_ind = Xrc[1,:]
+filtered = csr_matrix((data, (row_ind, col_ind)))
+    
+* onehot-columns.npy -
+numpy file containing information corresponding to each column of the one-hot matrix representation of the filtered data.
+Columns are as follows: tag, tile variant, zygosity with heterozygous = 0 and homozygous = 1, p-value * 1e6 for each column of onehot.npy
+* samples.csv -mapping from numpy file (matrix.npy) and row number to input ID for each tiled genome
+** Columns are row number, genome ID (usually taken from tile name of gvcf/vcf, and name of npy output
+    - Example: 0,"A-WCAP-WC000711-BL-COL-39141BL1","matrix.npy"
+
+
+h2. Read me for annotations
+
+Directory: GRCh38.86_library_annotation/
+
+Files:
+* GRCh38.86_library_snpeff_dbsnp_gnomad.vcf.gz
+** gzipped vcf of each genomic variant found in tile variants containing frequencies and other annotation details (gene, predicted effects, etc) from dbsnp and gnmad.  
+** ID contains both HGVS and rsID (if found) and INFO contains tile variant (TV: tileposition-tilevariant) as well as the other annotations. All tiles variants contains that genomic variant are listed in the TV field. 
+- Example: 
+- #CHROM       POS     ID      REF     ALT     QUAL    FILTER  INFO
+- chr9 45079   chr9:g.45080del;rs55984476      TC      T       .       .       TV=,5649728-1,;ANN=T|intergenic_region|MODIFIER|FAM138C-PGM5P3-AS1|ENSG00000218839-ENSG00000277631|intergenic_region|ENSG00000218839-ENSG00000277631|||n.45080delC||||||;AC=129535;AN=129536;AF=0.999992;AF_afr=0.999966;AF_amr=1;AF_asj=1;AF_eas=1;AF_fin=1;AF_nfe=1;AF_oth=1
+** In this annotation file, for simplicity the name of the chromosome is used instead of the proper HGVS annotation for the reference and chromosome. If you want to search the HGVS annotation you will need to replace it. 
+        - Example: chr3:g.36130213T>A -> NC_000003.12:g.36130213T>A
+        - Example: chr10:g.13511587G>A -> NC_000010.12:g.13511587G>A
+
+* GRCh38.86_library_snpeff_dbsnp_gnomad.vcf.gz.tbi
+** index file for vcf of annotations
+
+* GRCh38.86_library_summary.txt 
+** text file containing % of variants in each chromosome that were found in gnomad
+* GRCh38.86_reference_tiles.bed
+** bed file containing tile locations on GRCh38 for reference. 
+** The columns are as follows:
+** 1) Chromosome
+** 2) Tile start (including tag)
+** 3) Tile end (including tag)
+** 4) Tag #
+** 5) Coverage (this gives a score 0-1000 of how many times this tile is placed in a set of genomes, 1000 means the tag is found in every genome of the set. 0 indicates the tag is not found in any of the genomes.  Tag may not be placed due to variants or no-calls existing on the tag. 
+** 6) Strand (always ., included so that our bed file maintains the bed standard format)
+** 7) Tile start (not including tag)
+** 8) Tile end (not including tag
+- Example: 
+M 0 467 10654109 870 . 0 443
+M 443 959 10654110 895 . 467 935
+M 935 1394 10654111 985 . 959 1370
+"""
+
+def count_samples(samplescsv):
+  count = 0
+  with open(samplescsv) as f:
+    for line in f:
+      if line != "\n":
+        count += 1
+  return count
+
+def main():
+  samplescsv = sys.argv[1]
+  date = sys.argv[2]
+  description = sys.argv[3]
+  projecturl = sys.argv[4]
+
+  cohortsize = count_samples(samplescsv)
+  print(text.format(date, cohortsize, description, projecturl))
+
+if __name__ == '__main__':
+  main()
diff --git a/cwl/lightning/stage-output.cwl b/cwl/lightning/stage-output.cwl
new file mode 100644 (file)
index 0000000..362e4dd
--- /dev/null
@@ -0,0 +1,40 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.2
+class: ExpressionTool
+requirements:
+  InlineJavascriptRequirement: {}
+hints:
+  LoadListingRequirement:
+    loadListing: shallow_listing
+inputs:
+  libname: string
+  npyfiles: File[]
+  onehotnpyfiles: File[]
+  pcapngs: File[]
+  bed: File
+  annotatedvcf: File
+  summary: File
+outputs:
+  stagednpydir: Directory
+  stagedonehotnpydir: Directory
+  stagedannotationdir: Directory
+expression: |
+  ${
+    var stagednpydir = {"class": "Directory",
+                        "basename": "library_full",
+                        "listing": inputs.npyfiles};
+    var stagedonehotnpydir = {"class": "Directory",
+                              "basename": "library_filtered",
+                              "listing": inputs.onehotnpyfiles};
+    var annotationlist = inputs.pcapngs;
+    annotationlist.push(inputs.bed);
+    annotationlist.push(inputs.annotatedvcf);
+    annotationlist.push(inputs.summary);
+    var stagedannotationdir = {"class": "Directory",
+                                "basename": inputs.libname+"_annotation",
+                                "listing": annotationlist};
+    return {"stagednpydir": stagednpydir, "stagedonehotnpydir": stagedonehotnpydir, "stagedannotationdir": stagedannotationdir};
+  }
diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315.yml
new file mode 100644 (file)
index 0000000..97108e6
--- /dev/null
@@ -0,0 +1,353 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+refdirs:
+  - class: Directory
+    location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761
+batchsize: 12
+matchgenome_array: [""]
+regions_nestedarray:
+  - - null
+threads_array: [10]
+mergeoutput_array: ["false"]
+expandregions_array: [0]
+phenotypesdir:
+  class: Directory
+  location: keep:2xpu4-4zz18-yq0njnojx7kaj34
+readmeinfo: ["2022-08-23", "called using VCPA1.1 pipeline", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"]
+chrs:
+  - "chr1"
+  - "chr2"
+  - "chr3"
+  - "chr4"
+  - "chr5"
+  - "chr6"
+  - "chr7"
+  - "chr8"
+  - "chr9"
+  - "chr10"
+  - "chr11"
+  - "chr12"
+  - "chr13"
+  - "chr14"
+  - "chr15"
+  - "chr16"
+  - "chr17"
+  - "chr18"
+  - "chr19"
+  - "chr20"
+  - "chr21"
+  - "chr22"
+  - "chrX"
+  - "chrY"
+snpeffdatadir:
+  class: Directory
+  location: keep:66c966928931de252274772c76f73025+52054
+genomeversion_array: ["GRCh38.86"]
+dbsnp:
+  class: File
+  location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf
+gnomaddir:
+  class: Directory
+  location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135
+fastadirs:
+  - class: Directory
+    location: keep:7f3ba4ea8d7504ce040da01507c71f6f+138479
+  - class: Directory
+    location: keep:5f4e90fbbdbc9496bc80d68a0ed3c082+138479
+  - class: Directory
+    location: keep:6a9d10762c98f71467957995f43a3227+138493
+  - class: Directory
+    location: keep:8803e18d1c5d3655e4c65a67fcde261f+138469
+  - class: Directory
+    location: keep:e7a60ec6ce207229bb5dd628882b7145+138497
+  - class: Directory
+    location: keep:01d219030897f7e1550c5aa5d4df7a35+137556
+  - class: Directory
+    location: keep:afbfe6c1d7d751607c47de7dcc3b839e+137959
+  - class: Directory
+    location: keep:8eff0f12f4b3c1fdc4332326c2ea6674+137107
+  - class: Directory
+    location: keep:64ca59668a2dcf61f351afcf350d42e7+138471
+  - class: Directory
+    location: keep:62d89d039fad7221f16c34b722e097af+138465
+  - class: Directory
+    location: keep:5a71084d1947ce8f9454b4b2e0796b41+138503
+  - class: Directory
+    location: keep:69c4e14981608d6be9aeaf6da94aebfb+138459
+  - class: Directory
+    location: keep:0eaa03f63440c825bc722022eac85448+138463
+  - class: Directory
+    location: keep:e167599ed16c7b1f7bff217fc5d80a4b+138457
+  - class: Directory
+    location: keep:cbf21c73503b4cb792b5231c46358ba3+138523
+  - class: Directory
+    location: keep:11683cd5f820ff9d42d3888bbb00221f+138205
+  - class: Directory
+    location: keep:8b09dae12292f375aa98de6f36be53c2+138491
+  - class: Directory
+    location: keep:8d02abcdef3e1b3ab910303df4331399+137649
+  - class: Directory
+    location: keep:c3884ba793bf5771ba7267246f747893+138479
+  - class: Directory
+    location: keep:e96460313d37f2e7d0aa647d3b88f6f8+138479
+  - class: Directory
+    location: keep:fa72acb0219d6833f90beb1e3874e485+138481
+  - class: Directory
+    location: keep:01e5edc00295926a91351e8069cb144f+138477
+  - class: Directory
+    location: keep:bf076ef6270cf0f2cf3c6aaa79893cc8+138048
+  - class: Directory
+    location: keep:66d688c08060cd9750fb6e46cce3fa39+138479
+  - class: Directory
+    location: keep:9c78e31f255406bee37d8960ce79c185+138471
+  - class: Directory
+    location: keep:100591a332d9f256842f086fa92177dc+138735
+  - class: Directory
+    location: keep:ef10e9c7591c5466d5791666f830bf06+138725
+  - class: Directory
+    location: keep:8b42e879ae88a25fb4ca5888abea9c38+138687
+  - class: Directory
+    location: keep:ffa0d2888d14b2940562801bc163d4b2+138667
+  - class: Directory
+    location: keep:98854a1e4db58922e5e712507217e2b5+138483
+  - class: Directory
+    location: keep:f72b41e5d5cb7dbb4171eac06f86f6a1+138479
+  - class: Directory
+    location: keep:381b930fc4059b819bda2c414927deaf+138477
+  - class: Directory
+    location: keep:558b093245095d1029f4cbe1f2726e99+138056
+  - class: Directory
+    location: keep:0f937cc7f4e6f8b47d80e28f0a99bc8a+138479
+  - class: Directory
+    location: keep:95aba9cd1cce2de6fbc6c5851c81387d+138479
+  - class: Directory
+    location: keep:94328ab7b661267810f9f0197eae70da+138479
+  - class: Directory
+    location: keep:666124ae8a9a6734bba5de48490748c0+138479
+  - class: Directory
+    location: keep:98359aa9baa931eea204298f7b26563c+138479
+  - class: Directory
+    location: keep:c061711a7b7e7d8acddfde36e785cc77+138479
+  - class: Directory
+    location: keep:ac21ed8a0e6af91debc39ddee1197787+138503
+  - class: Directory
+    location: keep:41ab49b22d2f4ed90fa4f446ef7fbb8d+138357
+  - class: Directory
+    location: keep:0e6d1ff3738abd240efaca9079f62f46+138851
+  - class: Directory
+    location: keep:ed82b93d0ca76ffd666457399c2462cd+138101
+  - class: Directory
+    location: keep:7efb683d9556c87c69fb1cbbaa290820+137674
+  - class: Directory
+    location: keep:377b8665f04091581e560614ba20ca46+138515
+  - class: Directory
+    location: keep:da7207cc977b12f60372b742785eec62+138491
+  - class: Directory
+    location: keep:54403688627695480b373eb354dc1c7f+138479
+  - class: Directory
+    location: keep:dff90a9f9feaa5ed1e7c86f1e79926cc+138489
+  - class: Directory
+    location: keep:f82c35228c761a1e0f3ec9f5687efddd+137093
+  - class: Directory
+    location: keep:661cd8f54deabacb49ec78db93ed4578+138479
+  - class: Directory
+    location: keep:b326e6b7773354041b03dfe4564ffd0b+137095
+  - class: Directory
+    location: keep:853632bf7879231f9ac24096e52f95e1+138479
+  - class: Directory
+    location: keep:f190f2167bcd4fe819878efcde43e6f0+138059
+  - class: Directory
+    location: keep:51f70f5861f61aca2e441e2c5d282150+138479
+  - class: Directory
+    location: keep:6b4ecc7f949c824f6e2c99ccd8450238+138479
+  - class: Directory
+    location: keep:11ca0c4ee2a4409c6f43470515c897fd+138479
+  - class: Directory
+    location: keep:9f89f0c88e2b709af225e8ba2f8bff8d+138477
+  - class: Directory
+    location: keep:e8f37c72e41dcc35f02d4fbe5da08a8b+138485
+  - class: Directory
+    location: keep:1bbc537e0b7ac50e1e9ec5fe12c5bb2e+138580
+  - class: Directory
+    location: keep:f0322067b85babcae2ecdd19d34f7597+138478
+  - class: Directory
+    location: keep:47aa083e8d0a6c4c297d036165902282+138478
+  - class: Directory
+    location: keep:9f9d01fd058a06f30955f6b9709194fb+138531
+  - class: Directory
+    location: keep:f382e81786847d7395684e858786fc62+138479
+  - class: Directory
+    location: keep:42b9f228c66a1bb26f37f6751f4f125f+138479
+  - class: Directory
+    location: keep:abfec28b755a4826f3e58703dbbd1ac9+138059
+  - class: Directory
+    location: keep:6346d82f0746faee45f5ba194dd25d0b+138479
+  - class: Directory
+    location: keep:046da89ef991cc019f5092c326e2b8f8+138479
+  - class: Directory
+    location: keep:52bdef8f26c69eb338d5d94b08cce125+138479
+  - class: Directory
+    location: keep:4e0dbaba2b52fd93cb2c279415186a7b+138599
+  - class: Directory
+    location: keep:fa21ef0cd50701250a927b4993df53e7+138297
+  - class: Directory
+    location: keep:a333d969b2b84dfe7a2abb5889763c26+138675
+  - class: Directory
+    location: keep:5fcb253d3584035246f5b4bcd60fa348+137291
+  - class: Directory
+    location: keep:8a73aff41e5b603adddcf1eefe2e15e9+137209
+  - class: Directory
+    location: keep:e77aefd6c8f51abcf404e482a9b06b0d+134402
+  - class: Directory
+    location: keep:6ae72d55cf26d4afa12dab3970e0bf59+136783
+  - class: Directory
+    location: keep:b029a9d42e085a45a2ce1840d0961eb7+137879
+  - class: Directory
+    location: keep:41cd1f1511d617ec6e4bfb42db64be03+138187
+  - class: Directory
+    location: keep:55ad863fef7d8dccf4db32f282f752c4+135119
+  - class: Directory
+    location: keep:30e016d511a2443985d2b051638a419b+138047
+  - class: Directory
+    location: keep:2711053e417e15034cae0c8aec568a45+138352
+  - class: Directory
+    location: keep:020cddfa8082c5476c011aaa189cf518+138479
+  - class: Directory
+    location: keep:5bb2df76b0869bea8833784a934f7021+138479
+  - class: Directory
+    location: keep:112983eafc025aaeb54d81cf01ce3a4e+138479
+  - class: Directory
+    location: keep:6e1bf98f088ced1ef8b60f562374325b+138479
+  - class: Directory
+    location: keep:153d189544f36dd39610087a782589ca+138479
+  - class: Directory
+    location: keep:ff71aa46f8f4134f1556d4b19d0b9307+138058
+  - class: Directory
+    location: keep:e996083281665242208b0773b0d6287c+138058
+  - class: Directory
+    location: keep:3f2d1ab2f1fa98341ddab5b3cc53ec2e+138479
+  - class: Directory
+    location: keep:abbcfbc8fc06988a2b0d94b0c443e459+138479
+  - class: Directory
+    location: keep:7e6f8a9c9244ea7c6e3341d1b5aa6b32+138479
+  - class: Directory
+    location: keep:e08ebbc428bc0604e07f72b21410ce58+138479
+  - class: Directory
+    location: keep:e2367de25b3dffcb1ac1aa7373b7a283+138479
+  - class: Directory
+    location: keep:f4318c61400198c3d44895e184e79b4c+138479
+  - class: Directory
+    location: keep:5a0d263b7469871268e31919899e6103+138445
+  - class: Directory
+    location: keep:0c5a32dc706c195619b3f84bac5d3978+136353
+  - class: Directory
+    location: keep:3843ae092703f6696cb5f42f464f4583+134452
+  - class: Directory
+    location: keep:745d1dec2cfeb0f28f3eaf9dedd6a962+136783
+  - class: Directory
+    location: keep:570f09f532cbd74146bd8ba3cc363263+137749
+  - class: Directory
+    location: keep:ec659571f3a78e63e457797fc58f1828+137677
+  - class: Directory
+    location: keep:60da5cf26a882d1b4e024326cd7d893b+130004
+  - class: Directory
+    location: keep:13f6862e9770f63bd44b6ef539541dc3+138079
+  - class: Directory
+    location: keep:83fb84b000e58c16fe17adef5de277b1+138079
+  - class: Directory
+    location: keep:2865b8e5e49decb8b65b42ae060bcc96+138079
+  - class: Directory
+    location: keep:fc49628c14a44abf3e54c4a956aa5888+136845
+  - class: Directory
+    location: keep:6c52a3c5f4bc152851ad9cdcacaabf23+138089
+  - class: Directory
+    location: keep:c08e22c6705b07111205c0edc2c3872d+136586
+  - class: Directory
+    location: keep:c577425ff9f3a0d886f3426065a72ca4+137691
+  - class: Directory
+    location: keep:ddf235932a0e2176593aac5a4ac42861+136337
+  - class: Directory
+    location: keep:aa28a7e5d5657abbd0134e66cf6a89a5+133731
+  - class: Directory
+    location: keep:49b91c25d606fd2fa451d2b7c92193a8+137879
+  - class: Directory
+    location: keep:838fd46abf759e68bdc17f29d1680752+138063
+  - class: Directory
+    location: keep:a1391969d1b8e92d8c98ed4ad8fad8ed+136665
+  - class: Directory
+    location: keep:3dfbeda6c564f9f275f74c694022b52d+135167
+  - class: Directory
+    location: keep:94de36359231b01ef2e277083bfe5287+137677
+  - class: Directory
+    location: keep:2370e1db54c8e0e4b2d314c4a7984304+137845
+  - class: Directory
+    location: keep:aa76e46ce3409c0cccb96fe9f7ad960a+138170
+  - class: Directory
+    location: keep:5bb1f2fcb9961e0c0aa7159cabdd2fd3+137678
+  - class: Directory
+    location: keep:807a04e5c2d91d5f454323b6fc9e035d+137679
+  - class: Directory
+    location: keep:368d3f71ad4e0822a3bac8abe67ed536+133665
+  - class: Directory
+    location: keep:0d5b719030ae9bffbfbcd2c0855c66c7+128362
+  - class: Directory
+    location: keep:2c08aca1b2c0537e0d153a21dc0561f8+136859
+  - class: Directory
+    location: keep:9acd969c8b08cedce1fd8d769d214b75+137559
+  - class: Directory
+    location: keep:e27d19c6a505cd01eea3f0a13ba46a83+137977
+  - class: Directory
+    location: keep:127cc72a3bd0953bb19e05b83f5190d0+26161
+  - class: Directory
+    location: keep:fd615aa5b1fcf7f2695c4ec8f5b9351a+55569
+  - class: Directory
+    location: keep:dfed457efee3414f1c1f7536fe83eed3+109552
+  - class: Directory
+    location: keep:14388f62c868254149d5972c7d80d28d+97366
+  - class: Directory
+    location: keep:5e99d1ef6b90001e01450ffe093e9493+135278
+  - class: Directory
+    location: keep:153533e237092985535d7f9b0a4a354e+135278
+  - class: Directory
+    location: keep:1a0d1ede9fcabaabce592c86bec88db0+135278
+  - class: Directory
+    location: keep:281ec2620a88b7274b44ece0ca96b543+135278
+  - class: Directory
+    location: keep:9d2dfc3510b82e5249045e71862a44d9+135278
+  - class: Directory
+    location: keep:a3191cb63a48307e9cd97cc6de42d83c+135194
+  - class: Directory
+    location: keep:d80d9356d64396465ca61585b4c3031b+135419
+  - class: Directory
+    location: keep:881e67efac709867dafc480116c2edf1+138479
+  - class: Directory
+    location: keep:5ffe1b2bcdb75190b228d8e7117baaa0+138479
+  - class: Directory
+    location: keep:6a671736ca4bf130e3ab507b702024bc+136672
+  - class: Directory
+    location: keep:504c77a1298ff2649af35527dec8b467+138058
+  - class: Directory
+    location: keep:e19f2c238e685f62f52c58eb4c44d703+134867
+  - class: Directory
+    location: keep:afad6e6fd6933d8919a4a39712e80afe+67509
+  - class: Directory
+    location: keep:09771439ce1431d168b15bb36cc8cacb+136998
+  - class: Directory
+    location: keep:cb3b59ec7468d27ef643380568f945e4+138679
+  - class: Directory
+    location: keep:b3ec1453c963d0b5c36134bd3e7c97dc+138679
+  - class: Directory
+    location: keep:52d413d47ebc5abaeda098254ce6d517+138629
+  - class: Directory
+    location: keep:43e6437a5dc4d833ed2e167b42b9430a+138059
+  - class: Directory
+    location: keep:d081f62909038dd25c499972547ced53+138479
+  - class: Directory
+    location: keep:bcd3a181bcb601c1e57f7cb7eeb4d270+137975
+  - class: Directory
+    location: keep:9882f01b1cc6172959a3efab49d89397+78041
diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315_ADNI.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315_ADNI.yml
new file mode 100644 (file)
index 0000000..a2aa729
--- /dev/null
@@ -0,0 +1,353 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+refdirs:
+  - class: Directory
+    location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761
+batchsize: 12
+matchgenome_array: ["ADNI"]
+regions_nestedarray:
+  - - null
+threads_array: [10]
+mergeoutput_array: ["false"]
+expandregions_array: [0]
+phenotypesdir:
+  class: Directory
+  location: keep:2xpu4-4zz18-yq0njnojx7kaj34
+readmeinfo: ["2022-03-22", "called using VCPA1.1 pipeline, ADNI only", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"]
+chrs:
+  - "chr1"
+  - "chr2"
+  - "chr3"
+  - "chr4"
+  - "chr5"
+  - "chr6"
+  - "chr7"
+  - "chr8"
+  - "chr9"
+  - "chr10"
+  - "chr11"
+  - "chr12"
+  - "chr13"
+  - "chr14"
+  - "chr15"
+  - "chr16"
+  - "chr17"
+  - "chr18"
+  - "chr19"
+  - "chr20"
+  - "chr21"
+  - "chr22"
+  - "chrX"
+  - "chrY"
+snpeffdatadir:
+  class: Directory
+  location: keep:66c966928931de252274772c76f73025+52054
+genomeversion_array: ["GRCh38.86"]
+dbsnp:
+  class: File
+  location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf
+gnomaddir:
+  class: Directory
+  location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135
+fastadirs:
+  - class: Directory
+    location: keep:7f3ba4ea8d7504ce040da01507c71f6f+138479
+  - class: Directory
+    location: keep:5f4e90fbbdbc9496bc80d68a0ed3c082+138479
+  - class: Directory
+    location: keep:6a9d10762c98f71467957995f43a3227+138493
+  - class: Directory
+    location: keep:8803e18d1c5d3655e4c65a67fcde261f+138469
+  - class: Directory
+    location: keep:e7a60ec6ce207229bb5dd628882b7145+138497
+  - class: Directory
+    location: keep:01d219030897f7e1550c5aa5d4df7a35+137556
+  - class: Directory
+    location: keep:afbfe6c1d7d751607c47de7dcc3b839e+137959
+  - class: Directory
+    location: keep:8eff0f12f4b3c1fdc4332326c2ea6674+137107
+  - class: Directory
+    location: keep:64ca59668a2dcf61f351afcf350d42e7+138471
+  - class: Directory
+    location: keep:62d89d039fad7221f16c34b722e097af+138465
+  - class: Directory
+    location: keep:5a71084d1947ce8f9454b4b2e0796b41+138503
+  - class: Directory
+    location: keep:69c4e14981608d6be9aeaf6da94aebfb+138459
+  - class: Directory
+    location: keep:0eaa03f63440c825bc722022eac85448+138463
+  - class: Directory
+    location: keep:e167599ed16c7b1f7bff217fc5d80a4b+138457
+  - class: Directory
+    location: keep:cbf21c73503b4cb792b5231c46358ba3+138523
+  - class: Directory
+    location: keep:11683cd5f820ff9d42d3888bbb00221f+138205
+  - class: Directory
+    location: keep:8b09dae12292f375aa98de6f36be53c2+138491
+  - class: Directory
+    location: keep:8d02abcdef3e1b3ab910303df4331399+137649
+  - class: Directory
+    location: keep:c3884ba793bf5771ba7267246f747893+138479
+  - class: Directory
+    location: keep:e96460313d37f2e7d0aa647d3b88f6f8+138479
+  - class: Directory
+    location: keep:fa72acb0219d6833f90beb1e3874e485+138481
+  - class: Directory
+    location: keep:01e5edc00295926a91351e8069cb144f+138477
+  - class: Directory
+    location: keep:bf076ef6270cf0f2cf3c6aaa79893cc8+138048
+  - class: Directory
+    location: keep:66d688c08060cd9750fb6e46cce3fa39+138479
+  - class: Directory
+    location: keep:9c78e31f255406bee37d8960ce79c185+138471
+  - class: Directory
+    location: keep:100591a332d9f256842f086fa92177dc+138735
+  - class: Directory
+    location: keep:ef10e9c7591c5466d5791666f830bf06+138725
+  - class: Directory
+    location: keep:8b42e879ae88a25fb4ca5888abea9c38+138687
+  - class: Directory
+    location: keep:ffa0d2888d14b2940562801bc163d4b2+138667
+  - class: Directory
+    location: keep:98854a1e4db58922e5e712507217e2b5+138483
+  - class: Directory
+    location: keep:f72b41e5d5cb7dbb4171eac06f86f6a1+138479
+  - class: Directory
+    location: keep:381b930fc4059b819bda2c414927deaf+138477
+  - class: Directory
+    location: keep:558b093245095d1029f4cbe1f2726e99+138056
+  - class: Directory
+    location: keep:0f937cc7f4e6f8b47d80e28f0a99bc8a+138479
+  - class: Directory
+    location: keep:95aba9cd1cce2de6fbc6c5851c81387d+138479
+  - class: Directory
+    location: keep:94328ab7b661267810f9f0197eae70da+138479
+  - class: Directory
+    location: keep:666124ae8a9a6734bba5de48490748c0+138479
+  - class: Directory
+    location: keep:98359aa9baa931eea204298f7b26563c+138479
+  - class: Directory
+    location: keep:c061711a7b7e7d8acddfde36e785cc77+138479
+  - class: Directory
+    location: keep:ac21ed8a0e6af91debc39ddee1197787+138503
+  - class: Directory
+    location: keep:41ab49b22d2f4ed90fa4f446ef7fbb8d+138357
+  - class: Directory
+    location: keep:0e6d1ff3738abd240efaca9079f62f46+138851
+  - class: Directory
+    location: keep:ed82b93d0ca76ffd666457399c2462cd+138101
+  - class: Directory
+    location: keep:7efb683d9556c87c69fb1cbbaa290820+137674
+  - class: Directory
+    location: keep:377b8665f04091581e560614ba20ca46+138515
+  - class: Directory
+    location: keep:da7207cc977b12f60372b742785eec62+138491
+  - class: Directory
+    location: keep:54403688627695480b373eb354dc1c7f+138479
+  - class: Directory
+    location: keep:dff90a9f9feaa5ed1e7c86f1e79926cc+138489
+  - class: Directory
+    location: keep:f82c35228c761a1e0f3ec9f5687efddd+137093
+  - class: Directory
+    location: keep:661cd8f54deabacb49ec78db93ed4578+138479
+  - class: Directory
+    location: keep:b326e6b7773354041b03dfe4564ffd0b+137095
+  - class: Directory
+    location: keep:853632bf7879231f9ac24096e52f95e1+138479
+  - class: Directory
+    location: keep:f190f2167bcd4fe819878efcde43e6f0+138059
+  - class: Directory
+    location: keep:51f70f5861f61aca2e441e2c5d282150+138479
+  - class: Directory
+    location: keep:6b4ecc7f949c824f6e2c99ccd8450238+138479
+  - class: Directory
+    location: keep:11ca0c4ee2a4409c6f43470515c897fd+138479
+  - class: Directory
+    location: keep:9f89f0c88e2b709af225e8ba2f8bff8d+138477
+  - class: Directory
+    location: keep:e8f37c72e41dcc35f02d4fbe5da08a8b+138485
+  - class: Directory
+    location: keep:1bbc537e0b7ac50e1e9ec5fe12c5bb2e+138580
+  - class: Directory
+    location: keep:f0322067b85babcae2ecdd19d34f7597+138478
+  - class: Directory
+    location: keep:47aa083e8d0a6c4c297d036165902282+138478
+  - class: Directory
+    location: keep:9f9d01fd058a06f30955f6b9709194fb+138531
+  - class: Directory
+    location: keep:f382e81786847d7395684e858786fc62+138479
+  - class: Directory
+    location: keep:42b9f228c66a1bb26f37f6751f4f125f+138479
+  - class: Directory
+    location: keep:abfec28b755a4826f3e58703dbbd1ac9+138059
+  - class: Directory
+    location: keep:6346d82f0746faee45f5ba194dd25d0b+138479
+  - class: Directory
+    location: keep:046da89ef991cc019f5092c326e2b8f8+138479
+  - class: Directory
+    location: keep:52bdef8f26c69eb338d5d94b08cce125+138479
+  - class: Directory
+    location: keep:4e0dbaba2b52fd93cb2c279415186a7b+138599
+  - class: Directory
+    location: keep:fa21ef0cd50701250a927b4993df53e7+138297
+  - class: Directory
+    location: keep:a333d969b2b84dfe7a2abb5889763c26+138675
+  - class: Directory
+    location: keep:5fcb253d3584035246f5b4bcd60fa348+137291
+  - class: Directory
+    location: keep:8a73aff41e5b603adddcf1eefe2e15e9+137209
+  - class: Directory
+    location: keep:e77aefd6c8f51abcf404e482a9b06b0d+134402
+  - class: Directory
+    location: keep:6ae72d55cf26d4afa12dab3970e0bf59+136783
+  - class: Directory
+    location: keep:b029a9d42e085a45a2ce1840d0961eb7+137879
+  - class: Directory
+    location: keep:41cd1f1511d617ec6e4bfb42db64be03+138187
+  - class: Directory
+    location: keep:55ad863fef7d8dccf4db32f282f752c4+135119
+  - class: Directory
+    location: keep:30e016d511a2443985d2b051638a419b+138047
+  - class: Directory
+    location: keep:2711053e417e15034cae0c8aec568a45+138352
+  - class: Directory
+    location: keep:020cddfa8082c5476c011aaa189cf518+138479
+  - class: Directory
+    location: keep:5bb2df76b0869bea8833784a934f7021+138479
+  - class: Directory
+    location: keep:112983eafc025aaeb54d81cf01ce3a4e+138479
+  - class: Directory
+    location: keep:6e1bf98f088ced1ef8b60f562374325b+138479
+  - class: Directory
+    location: keep:153d189544f36dd39610087a782589ca+138479
+  - class: Directory
+    location: keep:ff71aa46f8f4134f1556d4b19d0b9307+138058
+  - class: Directory
+    location: keep:e996083281665242208b0773b0d6287c+138058
+  - class: Directory
+    location: keep:3f2d1ab2f1fa98341ddab5b3cc53ec2e+138479
+  - class: Directory
+    location: keep:abbcfbc8fc06988a2b0d94b0c443e459+138479
+  - class: Directory
+    location: keep:7e6f8a9c9244ea7c6e3341d1b5aa6b32+138479
+  - class: Directory
+    location: keep:e08ebbc428bc0604e07f72b21410ce58+138479
+  - class: Directory
+    location: keep:e2367de25b3dffcb1ac1aa7373b7a283+138479
+  - class: Directory
+    location: keep:f4318c61400198c3d44895e184e79b4c+138479
+  - class: Directory
+    location: keep:5a0d263b7469871268e31919899e6103+138445
+  - class: Directory
+    location: keep:0c5a32dc706c195619b3f84bac5d3978+136353
+  - class: Directory
+    location: keep:3843ae092703f6696cb5f42f464f4583+134452
+  - class: Directory
+    location: keep:745d1dec2cfeb0f28f3eaf9dedd6a962+136783
+  - class: Directory
+    location: keep:570f09f532cbd74146bd8ba3cc363263+137749
+  - class: Directory
+    location: keep:ec659571f3a78e63e457797fc58f1828+137677
+  - class: Directory
+    location: keep:60da5cf26a882d1b4e024326cd7d893b+130004
+  - class: Directory
+    location: keep:13f6862e9770f63bd44b6ef539541dc3+138079
+  - class: Directory
+    location: keep:83fb84b000e58c16fe17adef5de277b1+138079
+  - class: Directory
+    location: keep:2865b8e5e49decb8b65b42ae060bcc96+138079
+  - class: Directory
+    location: keep:fc49628c14a44abf3e54c4a956aa5888+136845
+  - class: Directory
+    location: keep:6c52a3c5f4bc152851ad9cdcacaabf23+138089
+  - class: Directory
+    location: keep:c08e22c6705b07111205c0edc2c3872d+136586
+  - class: Directory
+    location: keep:c577425ff9f3a0d886f3426065a72ca4+137691
+  - class: Directory
+    location: keep:ddf235932a0e2176593aac5a4ac42861+136337
+  - class: Directory
+    location: keep:aa28a7e5d5657abbd0134e66cf6a89a5+133731
+  - class: Directory
+    location: keep:49b91c25d606fd2fa451d2b7c92193a8+137879
+  - class: Directory
+    location: keep:838fd46abf759e68bdc17f29d1680752+138063
+  - class: Directory
+    location: keep:a1391969d1b8e92d8c98ed4ad8fad8ed+136665
+  - class: Directory
+    location: keep:3dfbeda6c564f9f275f74c694022b52d+135167
+  - class: Directory
+    location: keep:94de36359231b01ef2e277083bfe5287+137677
+  - class: Directory
+    location: keep:2370e1db54c8e0e4b2d314c4a7984304+137845
+  - class: Directory
+    location: keep:aa76e46ce3409c0cccb96fe9f7ad960a+138170
+  - class: Directory
+    location: keep:5bb1f2fcb9961e0c0aa7159cabdd2fd3+137678
+  - class: Directory
+    location: keep:807a04e5c2d91d5f454323b6fc9e035d+137679
+  - class: Directory
+    location: keep:368d3f71ad4e0822a3bac8abe67ed536+133665
+  - class: Directory
+    location: keep:0d5b719030ae9bffbfbcd2c0855c66c7+128362
+  - class: Directory
+    location: keep:2c08aca1b2c0537e0d153a21dc0561f8+136859
+  - class: Directory
+    location: keep:9acd969c8b08cedce1fd8d769d214b75+137559
+  - class: Directory
+    location: keep:e27d19c6a505cd01eea3f0a13ba46a83+137977
+  - class: Directory
+    location: keep:127cc72a3bd0953bb19e05b83f5190d0+26161
+  - class: Directory
+    location: keep:fd615aa5b1fcf7f2695c4ec8f5b9351a+55569
+  - class: Directory
+    location: keep:dfed457efee3414f1c1f7536fe83eed3+109552
+  - class: Directory
+    location: keep:14388f62c868254149d5972c7d80d28d+97366
+  - class: Directory
+    location: keep:5e99d1ef6b90001e01450ffe093e9493+135278
+  - class: Directory
+    location: keep:153533e237092985535d7f9b0a4a354e+135278
+  - class: Directory
+    location: keep:1a0d1ede9fcabaabce592c86bec88db0+135278
+  - class: Directory
+    location: keep:281ec2620a88b7274b44ece0ca96b543+135278
+  - class: Directory
+    location: keep:9d2dfc3510b82e5249045e71862a44d9+135278
+  - class: Directory
+    location: keep:a3191cb63a48307e9cd97cc6de42d83c+135194
+  - class: Directory
+    location: keep:d80d9356d64396465ca61585b4c3031b+135419
+  - class: Directory
+    location: keep:881e67efac709867dafc480116c2edf1+138479
+  - class: Directory
+    location: keep:5ffe1b2bcdb75190b228d8e7117baaa0+138479
+  - class: Directory
+    location: keep:6a671736ca4bf130e3ab507b702024bc+136672
+  - class: Directory
+    location: keep:504c77a1298ff2649af35527dec8b467+138058
+  - class: Directory
+    location: keep:e19f2c238e685f62f52c58eb4c44d703+134867
+  - class: Directory
+    location: keep:afad6e6fd6933d8919a4a39712e80afe+67509
+  - class: Directory
+    location: keep:09771439ce1431d168b15bb36cc8cacb+136998
+  - class: Directory
+    location: keep:cb3b59ec7468d27ef643380568f945e4+138679
+  - class: Directory
+    location: keep:b3ec1453c963d0b5c36134bd3e7c97dc+138679
+  - class: Directory
+    location: keep:52d413d47ebc5abaeda098254ce6d517+138629
+  - class: Directory
+    location: keep:43e6437a5dc4d833ed2e167b42b9430a+138059
+  - class: Directory
+    location: keep:d081f62909038dd25c499972547ced53+138479
+  - class: Directory
+    location: keep:bcd3a181bcb601c1e57f7cb7eeb4d270+137975
+  - class: Directory
+    location: keep:9882f01b1cc6172959a3efab49d89397+78041
diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml
new file mode 100644 (file)
index 0000000..452681c
--- /dev/null
@@ -0,0 +1,59 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+refdirs:
+  - class: Directory
+    location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761
+batchsize: 12
+matchgenome_array: [""]
+regions_nestedarray:
+  - - null
+threads_array: [10]
+mergeoutput_array: ["false"]
+expandregions_array: [0]
+phenotypesdir:
+  class: Directory
+  location: keep:2xpu4-4zz18-yq0njnojx7kaj34
+readmeinfo: ["2022-03-22", "called using VCPA1.1 pipeline", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"]
+chrs:
+  - "chr1"
+  - "chr2"
+  - "chr3"
+  - "chr4"
+  - "chr5"
+  - "chr6"
+  - "chr7"
+  - "chr8"
+  - "chr9"
+  - "chr10"
+  - "chr11"
+  - "chr12"
+  - "chr13"
+  - "chr14"
+  - "chr15"
+  - "chr16"
+  - "chr17"
+  - "chr18"
+  - "chr19"
+  - "chr20"
+  - "chr21"
+  - "chr22"
+  - "chrX"
+  - "chrY"
+snpeffdatadir:
+  class: Directory
+  location: keep:66c966928931de252274772c76f73025+52054
+genomeversion_array: ["GRCh38.86"]
+dbsnp:
+  class: File
+  location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf
+gnomaddir:
+  class: Directory
+  location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135
+fastadirs:
+  - class: Directory
+    location: keep:22524bac46f1363efaadea1d845f8c90+1442
diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-public.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-public.yml
new file mode 100644 (file)
index 0000000..2c03b43
--- /dev/null
@@ -0,0 +1,327 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+refdirs:
+  - class: Directory
+    location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761
+batchsize: 12
+matchgenome_array: [""]
+regions_nestedarray:
+  - - null
+threads_array: [10]
+mergeoutput_array: ["false"]
+expandregions_array: [0]
+phenotypesdir:
+  class: Directory
+  location: keep:2xpu4-4zz18-yq0njnojx7kaj34
+readmeinfo: ["2022-08-27", "public data", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"]
+chrs:
+  - "chr1"
+  - "chr2"
+  - "chr3"
+  - "chr4"
+  - "chr5"
+  - "chr6"
+  - "chr7"
+  - "chr8"
+  - "chr9"
+  - "chr10"
+  - "chr11"
+  - "chr12"
+  - "chr13"
+  - "chr14"
+  - "chr15"
+  - "chr16"
+  - "chr17"
+  - "chr18"
+  - "chr19"
+  - "chr20"
+  - "chr21"
+  - "chr22"
+  - "chrX"
+  - "chrY"
+snpeffdatadir:
+  class: Directory
+  location: keep:66c966928931de252274772c76f73025+52054
+genomeversion_array: ["GRCh38.86"]
+dbsnp:
+  class: File
+  location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf
+gnomaddir:
+  class: Directory
+  location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135
+fastadirs:
+  - class: Directory
+    location: keep:f70e65326f36502319b520e0abe532a4+86845
+  - class: Directory
+    location: keep:2ae6287169aab85ce3d4b62565aef7b7+86852
+  - class: Directory
+    location: keep:5ba1f0e9a8b99768414d50020ce171d1+86860
+  - class: Directory
+    location: keep:ec4b7b56b8f40bcdd106e0a9b343eb0b+86858
+  - class: Directory
+    location: keep:354e180b1dcca07e1afc798ac6e46170+86854
+  - class: Directory
+    location: keep:da769c6fd883225ed10320e8d0a8891c+20975
+  - class: Directory
+    location: keep:6993bbbb53913aab66a11c004fad3817+21088
+  - class: Directory
+    location: keep:c586e7a0b300ab3c59ec622b6b4eb43d+21087
+  - class: Directory
+    location: keep:6f9199c7e66c0aa72b570855500ba8bd+20984
+  - class: Directory
+    location: keep:bb22967697aac47adc7b4139bcc90311+20970
+  - class: Directory
+    location: keep:d8b9d7a259e6674c9dcb3c410b8aba93+86847
+  - class: Directory
+    location: keep:c0c62e1429a60268728fe980b06d1cd4+78425
+  - class: Directory
+    location: keep:392860853b0525b9af785ade7553ec1a+86859
+  - class: Directory
+    location: keep:348d9ad2b199ac1969776bc824bd5678+86850
+  - class: Directory
+    location: keep:8524d2e65adaf66619385f4ce5b7d12c+86856
+  - class: Directory
+    location: keep:fed854d8ad8d272d80f0f1727526c0b0+86850
+  - class: Directory
+    location: keep:fd9672a8eed704079acb034aa3a9ff39+86851
+  - class: Directory
+    location: keep:12976e792efa5b41bae0d1747dcfae9f+86838
+  - class: Directory
+    location: keep:d479e7f2068dfb99bc77a5fb89974aa6+86843
+  - class: Directory
+    location: keep:c6df9ee5438b0c0375dcc36596a55a08+21037
+  - class: Directory
+    location: keep:a8ef8420e1349d35bd8c561897c69d07+21037
+  - class: Directory
+    location: keep:ae21ed97df861b3b6d7ab06fdb791853+21037
+  - class: Directory
+    location: keep:3e7237716e8b2abbb3201d58127d7fad+21037
+  - class: Directory
+    location: keep:804223776db91718ef3950f035748aab+20923
+  - class: Directory
+    location: keep:b692a2d5743866421fef3b59b5459ae7+21037
+  - class: Directory
+    location: keep:b849f97ef209feacae0a9e2b01f76256+21037
+  - class: Directory
+    location: keep:8a0d511e5386be90200c7f1a41494c6b+21511
+  - class: Directory
+    location: keep:47a99ccd72dfcee0d71686605b0aa30d+21037
+  - class: Directory
+    location: keep:b91d899117aedad5e83723046bcb85cd+21037
+  - class: Directory
+    location: keep:56c00d8b4eba69b6232135fa35362e95+21037
+  - class: Directory
+    location: keep:b1fa3eb3e3ea592172cf2dd0400713ec+21037
+  - class: Directory
+    location: keep:9d26ff7a6725f4295ed2b8e54dfc0a9e+21037
+  - class: Directory
+    location: keep:9a64e0e79c49d8d564d5268c830ac7a4+86837
+  - class: Directory
+    location: keep:388c45e48909fac3e3341d421497c882+20872
+  - class: Directory
+    location: keep:c33f40e0fff14cebfdacfecb932a2d16+21037
+  - class: Directory
+    location: keep:d280663b4ffcf7fc9cddcbd3ebbc0ecc+21037
+  - class: Directory
+    location: keep:4db992a5512733f46a4c26d8a1ae47be+21440
+  - class: Directory
+    location: keep:37f6bfa92ce9dd08e34d62edb6051603+86838
+  - class: Directory
+    location: keep:84a070c697ceec31000bf24de2cc65ba+21037
+  - class: Directory
+    location: keep:b8bcb829ee3a883bc5f4ed7e35a5d127+21037
+  - class: Directory
+    location: keep:b6c079b2a6dec889c79f4072737f8ba1+20164
+  - class: Directory
+    location: keep:713f9b8dd6ee5c1d5c82dacbd0ce8e65+21037
+  - class: Directory
+    location: keep:55dbfb376eef1a2d6547d85b3604ae96+21037
+  - class: Directory
+    location: keep:f796cb3f2c9f96b94f707ad3aa42a57d+21037
+  - class: Directory
+    location: keep:e65dea3f9460d946abdfdd323c302e9f+21037
+  - class: Directory
+    location: keep:ec7268728cc5f293f34301044306acd2+20985
+  - class: Directory
+    location: keep:fdf43161f94ef9171a8c1963e92b47e3+21037
+  - class: Directory
+    location: keep:5f1fecbc66525f0924ced98bcae5c0ca+21523
+  - class: Directory
+    location: keep:66e5991fa0f6c39648c4c2fc33114ddf+21037
+  - class: Directory
+    location: keep:c57f6713ae289b6534996aceb032bb42+20586
+  - class: Directory
+    location: keep:8a49b79303015e7381b25991b18df204+21037
+  - class: Directory
+    location: keep:9a65a4d32f1d948a08212dd1b6e73512+21037
+  - class: Directory
+    location: keep:9fcfdf94365cbff9bccc2f7ed75d951e+21037
+  - class: Directory
+    location: keep:45d0e870bd2d26d5b34b1aa65d8e48b5+21037
+  - class: Directory
+    location: keep:6a02e00dcfe7dda5ce7a68b0cd547d4b+21037
+  - class: Directory
+    location: keep:e887895e2d866e4ce98a56ae8edfda86+20978
+  - class: Directory
+    location: keep:f09b0c7feb0f70c76f28471c9b45bdfc+21513
+  - class: Directory
+    location: keep:578d8a6e623d7b9c36c85aabc08fc11d+21037
+  - class: Directory
+    location: keep:4937ca42877ff56d32f0155d03c72721+20421
+  - class: Directory
+    location: keep:43779f0116bf633ca757c029bc72c283+7877
+  - class: Directory
+    location: keep:a9a04ce62720f93c97ac292f3a0fc565+21037
+  - class: Directory
+    location: keep:ae042538726c56c73158ec979a3aab78+17520
+  - class: Directory
+    location: keep:132fd51727f13ffc5d146e2b5879bc0a+21037
+  - class: Directory
+    location: keep:b70d5cd30bec2518977dd0f4264c31c6+21037
+  - class: Directory
+    location: keep:9d65f08e988b2e1099a7484fbded50f0+21037
+  - class: Directory
+    location: keep:f4f743252457b49968b36b87e2d892de+21037
+  - class: Directory
+    location: keep:b9846a7d4328a5d5ae489ddf2fdcbd77+20923
+  - class: Directory
+    location: keep:00c9233f5bbd92975019dfa9d7b85d95+21037
+  - class: Directory
+    location: keep:c67daae4ad6e6dbb723da66b06852b40+21519
+  - class: Directory
+    location: keep:14a37d50a3be9facfa187e88d5d2181e+21037
+  - class: Directory
+    location: keep:76db97f00e45b2d1bcd802a27c12cc8f+21079
+  - class: Directory
+    location: keep:82e07421ee8c32d40f23a4563f8fb75e+21442
+  - class: Directory
+    location: keep:35a7ea114db1b31d210a482f314faaa0+21037
+  - class: Directory
+    location: keep:67438d55eae3831633b8203732bb16e9+21037
+  - class: Directory
+    location: keep:d1e690b74aa49248719c58c4d0ba79b3+21037
+  - class: Directory
+    location: keep:551267f171357ef3fbbdde531ded3f2c+21037
+  - class: Directory
+    location: keep:c7b5f19064c6565020cd3fba15d8afb4+20927
+  - class: Directory
+    location: keep:a95ca99d6bc784b0e8fad8c80e526b5f+21037
+  - class: Directory
+    location: keep:d64d3dd3f11f1611efa23baa4994e87a+21037
+  - class: Directory
+    location: keep:7df93e28b40be1740f6c4ca510b2970f+21443
+  - class: Directory
+    location: keep:5e1dad08f1139addc7e1c21b69a5c2a8+21037
+  - class: Directory
+    location: keep:3206c2a7abd99a59e86bab81fb4e3754+21518
+  - class: Directory
+    location: keep:41bb1ba630a91f1c599a4890380caf80+86846
+  - class: Directory
+    location: keep:9614163f41df1585913d71526592004b+20757
+  - class: Directory
+    location: keep:db85f1601caadbdff8119d9e35d19cdb+86851
+  - class: Directory
+    location: keep:756e93603f4c4e6a5c8d36c48b191d19+21037
+  - class: Directory
+    location: keep:9cb7b46a661eda4b9255714a31eaee1c+21037
+  - class: Directory
+    location: keep:fa0402547d3a2ef224c8cec0a730ce32+21037
+  - class: Directory
+    location: keep:2e21c7cfd65354721f78606c987e449c+21037
+  - class: Directory
+    location: keep:a5296a9562fba84c7fefc57892addb2f+20978
+  - class: Directory
+    location: keep:1a17c7abc302e83a6bdda1dfc0f11f61+21037
+  - class: Directory
+    location: keep:64f07308131a03f5b93430a18a248dba+21528
+  - class: Directory
+    location: keep:7be8cdbe73c0717629a2c58abfb1a0a2+21037
+  - class: Directory
+    location: keep:9daea4925475221439048da5c97b80f9+21037
+  - class: Directory
+    location: keep:8a107d64077d26e39d3cd0900ddebb4e+21444
+  - class: Directory
+    location: keep:cee972e2fb687639320459497292eccf+20756
+  - class: Directory
+    location: keep:722874b6ddd947bbf34d11abdb4271bf+21037
+  - class: Directory
+    location: keep:d1b77ec53bdc9d9ac0ec0decd897e0fc+21037
+  - class: Directory
+    location: keep:f99b35068e9972e7bf893f01585c3231+21037
+  - class: Directory
+    location: keep:596a0a3a2b58ad354b9facd67c4f93c8+21037
+  - class: Directory
+    location: keep:56085a1717dbaa43feff2f1ed2d6ae7d+21436
+  - class: Directory
+    location: keep:0983df98a1d602a4434cbe327921ee42+21037
+  - class: Directory
+    location: keep:26b43654a8d419d40b197bd06ab438d9+21037
+  - class: Directory
+    location: keep:605b33acec8750852b8201e0ae98cf94+20923
+  - class: Directory
+    location: keep:8ee6e30a40156e4692328a56dcc06327+21521
+  - class: Directory
+    location: keep:a3a7bb8b4c99611fc152dbf321ee0a5b+21037
+  - class: Directory
+    location: keep:21ef9096b769c5ae945a24b7eaab7c52+5418
+  - class: Directory
+    location: keep:bb9f328605f0f597cecd4992c4cc91ea+21037
+  - class: Directory
+    location: keep:1fbd348efbfdcff58423cc808520490a+20787
+  - class: Directory
+    location: keep:ad6b5b7935f86ce26995b6b8b127e04b+21037
+  - class: Directory
+    location: keep:41f10a76531d35d29bdf45c561d19678+21037
+  - class: Directory
+    location: keep:80961e5505ecb0079c4477ed0001b21f+21037
+  - class: Directory
+    location: keep:1aaf3b528c72d94732c1cf18e047bca9+20922
+  - class: Directory
+    location: keep:7e338ffddacf4e247e03a4836856e585+21519
+  - class: Directory
+    location: keep:5860af9f92b6759227c4ab8a0bf8c463+21037
+  - class: Directory
+    location: keep:f87e293e0771107c2ed0231cef1dc331+21037
+  - class: Directory
+    location: keep:f8b88a16a2521531b8bb2a5de8043f68+21533
+  - class: Directory
+    location: keep:07d825d8cc3ee5e60aa365360e1052e5+21037
+  - class: Directory
+    location: keep:ac2cf9dbf715d2f3d7c56bf558c94214+21037
+  - class: Directory
+    location: keep:7709b885a502ce29a28384550a7ab40a+21037
+  - class: Directory
+    location: keep:39203af39d34284eee108dc966ee46f4+20978
+  - class: Directory
+    location: keep:db75f38419f25ad2ecb46c86e3f17d72+21037
+  - class: Directory
+    location: keep:3f75ed86079ce6f9c502609896adb7c1+21037
+  - class: Directory
+    location: keep:7288a896d67f4e8151b2ce66743da086+21523
+  - class: Directory
+    location: keep:10b80de1f92992b056e7068d81321b3b+21037
+  - class: Directory
+    location: keep:465f9957778cea2b0173667ad6c2cb68+21037
+  - class: Directory
+    location: keep:8bad3d6a943df3c7e9d37422eabf78d0+20979
+  - class: Directory
+    location: keep:7013b729faee9bcc20ef702568869fd9+21037
+  - class: Directory
+    location: keep:6581830f52d747d04fa6bba5a13fbf0e+21519
+  - class: Directory
+    location: keep:b7b0ad7d8c9f9953183e151928a7c9c0+21467
+  - class: Directory
+    location: keep:af2a0fbfc4c3176fc1f7f5cf218a90de+21521
+  - class: Directory
+    location: keep:209a1c955c53c10d53384f75c6d252e8+21444
+  - class: Directory
+    location: keep:5311e8a7770129c08dd16a41ed94c5eb+21441
+  - class: Directory
+    location: keep:aa08b00b2270276ab21a7923e7585050+21449
+  - class: Directory
+    location: keep:e0ac7e2ab8c6d35e3d8a2603c9aa37a5+21452
diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-test.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-test.yml
new file mode 100644 (file)
index 0000000..2d6e0aa
--- /dev/null
@@ -0,0 +1,41 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+refdirs:
+  - class: Directory
+    location: keep:3cf4b6ed2bf8cd3abc27cb5a79641a86+755
+  - class: Directory
+    location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761
+batchsize: 3
+fastadirs:
+  - class: Directory
+    location: keep:2xpu4-4zz18-6lg2a1uoanyiyzv
+  - class: Directory
+    location: keep:2xpu4-4zz18-bwlg17dkckptebn
+  - class: Directory
+    location: keep:2xpu4-4zz18-31tn6gfh4rn6rqb
+  - class: Directory
+    location: keep:2xpu4-4zz18-i4gi8cidwnaemt3
+  - class: Directory
+    location: keep:2xpu4-4zz18-1vwxurck9m902n0
+  - class: Directory
+    location: keep:2xpu4-4zz18-yge3zutgmgecgtl
+matchgenome_array: ["ACT", "ACT|ADC", ""]
+regions_nestedarray:
+  - - class: File
+      location: keep:bc2a62baf6698c2d58e6224f851884b8+224/gencode.v37lift37.annotation.gff3.gz
+    - class: File
+      location: keep:0b74ce016766e93f7b7292fe28ea8e25+70/PMC3896259.gff3.gz
+    - null
+  - - class: File
+      location: keep:bc2a62baf6698c2d58e6224f851884b8+224/gencode.v37.annotation.gff3.gz
+    - class: File
+      location: keep:dd5ab297413d698a39541aeadfa0f26f+70/PMC3896259.gff3.gz
+    - null
+threads_array: [80, 10, 10]
+mergeoutput_array: ["true", "true", "false"]
+expandregions_array: [1000000, 1000000, 0]
diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-testonehot.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-testonehot.yml
new file mode 100644 (file)
index 0000000..ce6b406
--- /dev/null
@@ -0,0 +1,68 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+refdirs:
+  - class: Directory
+    location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761
+batchsize: 3
+fastadirs:
+  - class: Directory
+    location: keep:2xpu4-4zz18-6lg2a1uoanyiyzv
+  - class: Directory
+    location: keep:2xpu4-4zz18-bwlg17dkckptebn
+  - class: Directory
+    location: keep:2xpu4-4zz18-31tn6gfh4rn6rqb
+  - class: Directory
+    location: keep:2xpu4-4zz18-i4gi8cidwnaemt3
+  - class: Directory
+    location: keep:2xpu4-4zz18-1vwxurck9m902n0
+  - class: Directory
+    location: keep:2xpu4-4zz18-yge3zutgmgecgtl
+matchgenome_array: [""]
+regions_nestedarray:
+  - - null
+threads_array: [10]
+mergeoutput_array: ["false"]
+expandregions_array: [0]
+phenotypesdir:
+  class: Directory
+  location: keep:2xpu4-4zz18-yq0njnojx7kaj34
+chrs:
+  - "chr1"
+  - "chr2"
+  - "chr3"
+  - "chr4"
+  - "chr5"
+  - "chr6"
+  - "chr7"
+  - "chr8"
+  - "chr9"
+  - "chr10"
+  - "chr11"
+  - "chr12"
+  - "chr13"
+  - "chr14"
+  - "chr15"
+  - "chr16"
+  - "chr17"
+  - "chr18"
+  - "chr19"
+  - "chr20"
+  - "chr21"
+  - "chr22"
+  - "chrX"
+  - "chrY"
+snpeffdatadir:
+  class: Directory
+  location: keep:66c966928931de252274772c76f73025+52054
+genomeversion_array: ["GRCh38.86"]
+dbsnp:
+  class: File
+  location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf
+gnomaddir:
+  class: Directory
+  location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135
diff --git a/cwl/lightning/yml/fasta2numpy-wf-0831_0315.yml b/cwl/lightning/yml/fasta2numpy-wf-0831_0315.yml
new file mode 100644 (file)
index 0000000..a71027a
--- /dev/null
@@ -0,0 +1,357 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+refdir:
+  class: Directory
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761
+batchsize: 12
+matchgenome: ""
+threads: 10
+mergeoutput: "false"
+expandregions: 0
+phenotypesnofamilydir:
+  class: Directory
+  location: keep:9b76b33c948bba77f2bf58f915cff9f4+120
+phenotypesdir:
+  class: Directory
+  location: keep:8508667def6057f0bbf0ab4f751d8b05+205
+trainingsetsize: 0.8
+randomseed: 0
+pcacomponents: 10
+readmeinfo: ["2022-08-23", "called using VCPA1.1 pipeline", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"]
+chrs:
+  - "chr1"
+  - "chr2"
+  - "chr3"
+  - "chr4"
+  - "chr5"
+  - "chr6"
+  - "chr7"
+  - "chr8"
+  - "chr9"
+  - "chr10"
+  - "chr11"
+  - "chr12"
+  - "chr13"
+  - "chr14"
+  - "chr15"
+  - "chr16"
+  - "chr17"
+  - "chr18"
+  - "chr19"
+  - "chr20"
+  - "chr21"
+  - "chr22"
+  - "chrX"
+  - "chrY"
+snpeffdatadir:
+  class: Directory
+  location: keep:66c966928931de252274772c76f73025+52054
+genomeversion: "GRCh38.86"
+dbsnp:
+  class: File
+  location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf
+gnomaddir:
+  class: Directory
+  location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135
+fastadirs:
+  - class: Directory
+    location: keep:7f3ba4ea8d7504ce040da01507c71f6f+138479
+  - class: Directory
+    location: keep:5f4e90fbbdbc9496bc80d68a0ed3c082+138479
+  - class: Directory
+    location: keep:6a9d10762c98f71467957995f43a3227+138493
+  - class: Directory
+    location: keep:8803e18d1c5d3655e4c65a67fcde261f+138469
+  - class: Directory
+    location: keep:e7a60ec6ce207229bb5dd628882b7145+138497
+  - class: Directory
+    location: keep:01d219030897f7e1550c5aa5d4df7a35+137556
+  - class: Directory
+    location: keep:afbfe6c1d7d751607c47de7dcc3b839e+137959
+  - class: Directory
+    location: keep:8eff0f12f4b3c1fdc4332326c2ea6674+137107
+  - class: Directory
+    location: keep:64ca59668a2dcf61f351afcf350d42e7+138471
+  - class: Directory
+    location: keep:62d89d039fad7221f16c34b722e097af+138465
+  - class: Directory
+    location: keep:5a71084d1947ce8f9454b4b2e0796b41+138503
+  - class: Directory
+    location: keep:69c4e14981608d6be9aeaf6da94aebfb+138459
+  - class: Directory
+    location: keep:0eaa03f63440c825bc722022eac85448+138463
+  - class: Directory
+    location: keep:e167599ed16c7b1f7bff217fc5d80a4b+138457
+  - class: Directory
+    location: keep:cbf21c73503b4cb792b5231c46358ba3+138523
+  - class: Directory
+    location: keep:11683cd5f820ff9d42d3888bbb00221f+138205
+  - class: Directory
+    location: keep:8b09dae12292f375aa98de6f36be53c2+138491
+  - class: Directory
+    location: keep:8d02abcdef3e1b3ab910303df4331399+137649
+  - class: Directory
+    location: keep:c3884ba793bf5771ba7267246f747893+138479
+  - class: Directory
+    location: keep:e96460313d37f2e7d0aa647d3b88f6f8+138479
+  - class: Directory
+    location: keep:fa72acb0219d6833f90beb1e3874e485+138481
+  - class: Directory
+    location: keep:01e5edc00295926a91351e8069cb144f+138477
+  - class: Directory
+    location: keep:bf076ef6270cf0f2cf3c6aaa79893cc8+138048
+  - class: Directory
+    location: keep:66d688c08060cd9750fb6e46cce3fa39+138479
+  - class: Directory
+    location: keep:9c78e31f255406bee37d8960ce79c185+138471
+  - class: Directory
+    location: keep:100591a332d9f256842f086fa92177dc+138735
+  - class: Directory
+    location: keep:ef10e9c7591c5466d5791666f830bf06+138725
+  - class: Directory
+    location: keep:8b42e879ae88a25fb4ca5888abea9c38+138687
+  - class: Directory
+    location: keep:ffa0d2888d14b2940562801bc163d4b2+138667
+  - class: Directory
+    location: keep:98854a1e4db58922e5e712507217e2b5+138483
+  - class: Directory
+    location: keep:f72b41e5d5cb7dbb4171eac06f86f6a1+138479
+  - class: Directory
+    location: keep:381b930fc4059b819bda2c414927deaf+138477
+  - class: Directory
+    location: keep:558b093245095d1029f4cbe1f2726e99+138056
+  - class: Directory
+    location: keep:0f937cc7f4e6f8b47d80e28f0a99bc8a+138479
+  - class: Directory
+    location: keep:95aba9cd1cce2de6fbc6c5851c81387d+138479
+  - class: Directory
+    location: keep:94328ab7b661267810f9f0197eae70da+138479
+  - class: Directory
+    location: keep:666124ae8a9a6734bba5de48490748c0+138479
+  - class: Directory
+    location: keep:98359aa9baa931eea204298f7b26563c+138479
+  - class: Directory
+    location: keep:c061711a7b7e7d8acddfde36e785cc77+138479
+  - class: Directory
+    location: keep:ac21ed8a0e6af91debc39ddee1197787+138503
+  - class: Directory
+    location: keep:41ab49b22d2f4ed90fa4f446ef7fbb8d+138357
+  - class: Directory
+    location: keep:0e6d1ff3738abd240efaca9079f62f46+138851
+  - class: Directory
+    location: keep:ed82b93d0ca76ffd666457399c2462cd+138101
+  - class: Directory
+    location: keep:7efb683d9556c87c69fb1cbbaa290820+137674
+  - class: Directory
+    location: keep:377b8665f04091581e560614ba20ca46+138515
+  - class: Directory
+    location: keep:da7207cc977b12f60372b742785eec62+138491
+  - class: Directory
+    location: keep:54403688627695480b373eb354dc1c7f+138479
+  - class: Directory
+    location: keep:dff90a9f9feaa5ed1e7c86f1e79926cc+138489
+  - class: Directory
+    location: keep:f82c35228c761a1e0f3ec9f5687efddd+137093
+  - class: Directory
+    location: keep:661cd8f54deabacb49ec78db93ed4578+138479
+  - class: Directory
+    location: keep:b326e6b7773354041b03dfe4564ffd0b+137095
+  - class: Directory
+    location: keep:853632bf7879231f9ac24096e52f95e1+138479
+  - class: Directory
+    location: keep:f190f2167bcd4fe819878efcde43e6f0+138059
+  - class: Directory
+    location: keep:51f70f5861f61aca2e441e2c5d282150+138479
+  - class: Directory
+    location: keep:6b4ecc7f949c824f6e2c99ccd8450238+138479
+  - class: Directory
+    location: keep:11ca0c4ee2a4409c6f43470515c897fd+138479
+  - class: Directory
+    location: keep:9f89f0c88e2b709af225e8ba2f8bff8d+138477
+  - class: Directory
+    location: keep:e8f37c72e41dcc35f02d4fbe5da08a8b+138485
+  - class: Directory
+    location: keep:1bbc537e0b7ac50e1e9ec5fe12c5bb2e+138580
+  - class: Directory
+    location: keep:f0322067b85babcae2ecdd19d34f7597+138478
+  - class: Directory
+    location: keep:47aa083e8d0a6c4c297d036165902282+138478
+  - class: Directory
+    location: keep:9f9d01fd058a06f30955f6b9709194fb+138531
+  - class: Directory
+    location: keep:f382e81786847d7395684e858786fc62+138479
+  - class: Directory
+    location: keep:42b9f228c66a1bb26f37f6751f4f125f+138479
+  - class: Directory
+    location: keep:abfec28b755a4826f3e58703dbbd1ac9+138059
+  - class: Directory
+    location: keep:6346d82f0746faee45f5ba194dd25d0b+138479
+  - class: Directory
+    location: keep:046da89ef991cc019f5092c326e2b8f8+138479
+  - class: Directory
+    location: keep:52bdef8f26c69eb338d5d94b08cce125+138479
+  - class: Directory
+    location: keep:4e0dbaba2b52fd93cb2c279415186a7b+138599
+  - class: Directory
+    location: keep:fa21ef0cd50701250a927b4993df53e7+138297
+  - class: Directory
+    location: keep:a333d969b2b84dfe7a2abb5889763c26+138675
+  - class: Directory
+    location: keep:5fcb253d3584035246f5b4bcd60fa348+137291
+  - class: Directory
+    location: keep:8a73aff41e5b603adddcf1eefe2e15e9+137209
+  - class: Directory
+    location: keep:e77aefd6c8f51abcf404e482a9b06b0d+134402
+  - class: Directory
+    location: keep:6ae72d55cf26d4afa12dab3970e0bf59+136783
+  - class: Directory
+    location: keep:b029a9d42e085a45a2ce1840d0961eb7+137879
+  - class: Directory
+    location: keep:41cd1f1511d617ec6e4bfb42db64be03+138187
+  - class: Directory
+    location: keep:55ad863fef7d8dccf4db32f282f752c4+135119
+  - class: Directory
+    location: keep:30e016d511a2443985d2b051638a419b+138047
+  - class: Directory
+    location: keep:2711053e417e15034cae0c8aec568a45+138352
+  - class: Directory
+    location: keep:020cddfa8082c5476c011aaa189cf518+138479
+  - class: Directory
+    location: keep:5bb2df76b0869bea8833784a934f7021+138479
+  - class: Directory
+    location: keep:112983eafc025aaeb54d81cf01ce3a4e+138479
+  - class: Directory
+    location: keep:6e1bf98f088ced1ef8b60f562374325b+138479
+  - class: Directory
+    location: keep:153d189544f36dd39610087a782589ca+138479
+  - class: Directory
+    location: keep:ff71aa46f8f4134f1556d4b19d0b9307+138058
+  - class: Directory
+    location: keep:e996083281665242208b0773b0d6287c+138058
+  - class: Directory
+    location: keep:3f2d1ab2f1fa98341ddab5b3cc53ec2e+138479
+  - class: Directory
+    location: keep:abbcfbc8fc06988a2b0d94b0c443e459+138479
+  - class: Directory
+    location: keep:7e6f8a9c9244ea7c6e3341d1b5aa6b32+138479
+  - class: Directory
+    location: keep:e08ebbc428bc0604e07f72b21410ce58+138479
+  - class: Directory
+    location: keep:e2367de25b3dffcb1ac1aa7373b7a283+138479
+  - class: Directory
+    location: keep:f4318c61400198c3d44895e184e79b4c+138479
+  - class: Directory
+    location: keep:5a0d263b7469871268e31919899e6103+138445
+  - class: Directory
+    location: keep:0c5a32dc706c195619b3f84bac5d3978+136353
+  - class: Directory
+    location: keep:3843ae092703f6696cb5f42f464f4583+134452
+  - class: Directory
+    location: keep:745d1dec2cfeb0f28f3eaf9dedd6a962+136783
+  - class: Directory
+    location: keep:570f09f532cbd74146bd8ba3cc363263+137749
+  - class: Directory
+    location: keep:ec659571f3a78e63e457797fc58f1828+137677
+  - class: Directory
+    location: keep:60da5cf26a882d1b4e024326cd7d893b+130004
+  - class: Directory
+    location: keep:13f6862e9770f63bd44b6ef539541dc3+138079
+  - class: Directory
+    location: keep:83fb84b000e58c16fe17adef5de277b1+138079
+  - class: Directory
+    location: keep:2865b8e5e49decb8b65b42ae060bcc96+138079
+  - class: Directory
+    location: keep:fc49628c14a44abf3e54c4a956aa5888+136845
+  - class: Directory
+    location: keep:6c52a3c5f4bc152851ad9cdcacaabf23+138089
+  - class: Directory
+    location: keep:c08e22c6705b07111205c0edc2c3872d+136586
+  - class: Directory
+    location: keep:c577425ff9f3a0d886f3426065a72ca4+137691
+  - class: Directory
+    location: keep:ddf235932a0e2176593aac5a4ac42861+136337
+  - class: Directory
+    location: keep:aa28a7e5d5657abbd0134e66cf6a89a5+133731
+  - class: Directory
+    location: keep:49b91c25d606fd2fa451d2b7c92193a8+137879
+  - class: Directory
+    location: keep:838fd46abf759e68bdc17f29d1680752+138063
+  - class: Directory
+    location: keep:a1391969d1b8e92d8c98ed4ad8fad8ed+136665
+  - class: Directory
+    location: keep:3dfbeda6c564f9f275f74c694022b52d+135167
+  - class: Directory
+    location: keep:94de36359231b01ef2e277083bfe5287+137677
+  - class: Directory
+    location: keep:2370e1db54c8e0e4b2d314c4a7984304+137845
+  - class: Directory
+    location: keep:aa76e46ce3409c0cccb96fe9f7ad960a+138170
+  - class: Directory
+    location: keep:5bb1f2fcb9961e0c0aa7159cabdd2fd3+137678
+  - class: Directory
+    location: keep:807a04e5c2d91d5f454323b6fc9e035d+137679
+  - class: Directory
+    location: keep:368d3f71ad4e0822a3bac8abe67ed536+133665
+  - class: Directory
+    location: keep:0d5b719030ae9bffbfbcd2c0855c66c7+128362
+  - class: Directory
+    location: keep:2c08aca1b2c0537e0d153a21dc0561f8+136859
+  - class: Directory
+    location: keep:9acd969c8b08cedce1fd8d769d214b75+137559
+  - class: Directory
+    location: keep:e27d19c6a505cd01eea3f0a13ba46a83+137977
+  - class: Directory
+    location: keep:127cc72a3bd0953bb19e05b83f5190d0+26161
+  - class: Directory
+    location: keep:fd615aa5b1fcf7f2695c4ec8f5b9351a+55569
+  - class: Directory
+    location: keep:dfed457efee3414f1c1f7536fe83eed3+109552
+  - class: Directory
+    location: keep:14388f62c868254149d5972c7d80d28d+97366
+  - class: Directory
+    location: keep:5e99d1ef6b90001e01450ffe093e9493+135278
+  - class: Directory
+    location: keep:153533e237092985535d7f9b0a4a354e+135278
+  - class: Directory
+    location: keep:1a0d1ede9fcabaabce592c86bec88db0+135278
+  - class: Directory
+    location: keep:281ec2620a88b7274b44ece0ca96b543+135278
+  - class: Directory
+    location: keep:9d2dfc3510b82e5249045e71862a44d9+135278
+  - class: Directory
+    location: keep:a3191cb63a48307e9cd97cc6de42d83c+135194
+  - class: Directory
+    location: keep:d80d9356d64396465ca61585b4c3031b+135419
+  - class: Directory
+    location: keep:881e67efac709867dafc480116c2edf1+138479
+  - class: Directory
+    location: keep:5ffe1b2bcdb75190b228d8e7117baaa0+138479
+  - class: Directory
+    location: keep:6a671736ca4bf130e3ab507b702024bc+136672
+  - class: Directory
+    location: keep:504c77a1298ff2649af35527dec8b467+138058
+  - class: Directory
+    location: keep:e19f2c238e685f62f52c58eb4c44d703+134867
+  - class: Directory
+    location: keep:afad6e6fd6933d8919a4a39712e80afe+67509
+  - class: Directory
+    location: keep:09771439ce1431d168b15bb36cc8cacb+136998
+  - class: Directory
+    location: keep:cb3b59ec7468d27ef643380568f945e4+138679
+  - class: Directory
+    location: keep:b3ec1453c963d0b5c36134bd3e7c97dc+138679
+  - class: Directory
+    location: keep:52d413d47ebc5abaeda098254ce6d517+138629
+  - class: Directory
+    location: keep:43e6437a5dc4d833ed2e167b42b9430a+138059
+  - class: Directory
+    location: keep:d081f62909038dd25c499972547ced53+138479
+  - class: Directory
+    location: keep:bcd3a181bcb601c1e57f7cb7eeb4d270+137975
+  - class: Directory
+    location: keep:9882f01b1cc6172959a3efab49d89397+78041
diff --git a/cwl/lightning/yml/fasta2numpy-wf-100test.yml b/cwl/lightning/yml/fasta2numpy-wf-100test.yml
new file mode 100644 (file)
index 0000000..0981448
--- /dev/null
@@ -0,0 +1,63 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+refdir:
+  class: Directory
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761
+batchsize: 1
+matchgenome: ""
+threads: 10
+mergeoutput: "false"
+expandregions: 0
+phenotypesnofamilydir:
+  class: Directory
+  location: keep:9b76b33c948bba77f2bf58f915cff9f4+120
+phenotypesdir:
+  class: Directory
+  location: keep:8508667def6057f0bbf0ab4f751d8b05+205
+trainingsetsize: 0.8
+randomseed: 0
+pcacomponents: 10
+readmeinfo: ["2022-08-23", "called using VCPA1.1 pipeline", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"]
+chrs:
+  - "chr1"
+  - "chr2"
+  - "chr3"
+  - "chr4"
+  - "chr5"
+  - "chr6"
+  - "chr7"
+  - "chr8"
+  - "chr9"
+  - "chr10"
+  - "chr11"
+  - "chr12"
+  - "chr13"
+  - "chr14"
+  - "chr15"
+  - "chr16"
+  - "chr17"
+  - "chr18"
+  - "chr19"
+  - "chr20"
+  - "chr21"
+  - "chr22"
+  - "chrX"
+  - "chrY"
+snpeffdatadir:
+  class: Directory
+  location: keep:66c966928931de252274772c76f73025+52054
+genomeversion: "GRCh38.86"
+dbsnp:
+  class: File
+  location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf
+gnomaddir:
+  class: Directory
+  location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135
+fastadirs:
+  - class: Directory
+    location: keep:7f3ba4ea8d7504ce040da01507c71f6f+138479
diff --git a/cwl/lightning/yml/imputation-gvcf2fasta-fasta2numpy-wf-100test.yml b/cwl/lightning/yml/imputation-gvcf2fasta-fasta2numpy-wf-100test.yml
new file mode 100644 (file)
index 0000000..a68bb4f
--- /dev/null
@@ -0,0 +1,337 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+refdir:
+  class: Directory
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761
+batchsize: 1
+matchgenome: ""
+threads: 10
+mergeoutput: "false"
+expandregions: 0
+gqcutoff: 20
+genomebed:
+  class: File
+  location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed
+ref:
+  class: File
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz
+chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"]
+refsdir:
+  class: Directory
+  location: keep:3a1673a2230877bfaf92b50f7376529c+10590
+mapsdir:
+  class: Directory
+  location: keep:10b1baade985c576a97dfc37d12b953b+1096
+panelnocallbed:
+  class: File
+  location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed
+panelcallbed:
+  class: File
+  location: keep:ea4364c48def931b8cdaf07913a3657b+94/HG001_GRCh38_1_22_v4.2.1_benchmark.bed
+sampleids:
+  - "A-IIAA-IA000604-BL-NCR-14AD67907"
+  - "A-IIAA-IA000369-BL-NCR-14AD67573"
+  - "A-IIAA-IA000196-BL-NCR-14AD66938"
+  - "A-IIAA-IA000194-BL-NCR-14AD66933"
+  - "A-IIAA-IA000009-BL-NCR-14AD67102"
+  - "A-WCAP-WC001736-BL-COL-62260BL1"
+  - "A-WCAP-WC001725-BL-COL-62376BL1"
+  - "A-WCAP-WC001708-BL-COL-50951BL1"
+  - "A-WCAP-WC001695-BL-COL-46967BL1"
+  - "A-WCAP-WC001710-BL-COL-40530BL1"
+  - "A-WCAP-WC001720-BL-COL-62328BL1"
+  - "A-WCAP-WC001704-BL-COL-48409BL1"
+  - "A-WCAP-WC001702-BL-COL-45946BL1"
+  - "A-WCAP-WC001697-BL-COL-46951BL1"
+  - "A-WCAP-WC001699-BL-COL-55120BL1"
+  - "A-WCAP-WC001700-BL-COL-57684BL1"
+  - "A-WCAP-WC001696-BL-COL-50506BL1"
+  - "A-WCAP-WC001693-BL-COL-48336BL1"
+  - "A-WCAP-WC001683-BL-COL-49188BL1"
+  - "A-WCAP-WC001687-BL-COL-48252BL1"
+  - "A-WCAP-WC001694-BL-COL-45207BL1"
+  - "A-WCAP-WC001686-BL-COL-50549BL1"
+  - "A-WCAP-WC001691-BL-COL-46031BL1"
+  - "A-WCAP-WC001688-BL-COL-50977BL1"
+  - "A-WCAP-WC001684-BL-COL-47006BL1"
+  - "A-WCAP-WC001681-BL-COL-57657BL1"
+  - "A-WCAP-WC001682-BL-COL-47560BL1"
+  - "A-WCAP-WC001679-BL-COL-49250BL1"
+  - "A-WCAP-WC001672-BL-COL-45187BL1"
+  - "A-WCAP-WC001673-BL-COL-48284BL1"
+  - "A-WCAP-WC001675-BL-COL-48314BL1"
+  - "A-WCAP-WC001677-BL-COL-46990BL1"
+  - "A-WCAP-WC001671-BL-COL-50527BL1"
+  - "A-WCAP-WC001669-BL-COL-57703BL1"
+  - "A-WCAP-WC001667-BL-COL-46970BL1"
+  - "A-WCAP-WC001670-BL-COL-50445BL1"
+  - "A-WCAP-WC001654-BL-COL-46029BL1"
+  - "A-WCAP-WC001664-BL-COL-39293BL1"
+  - "A-WCAP-WC001665-BL-COL-47583BL1"
+  - "A-WCAP-WC001653-BL-COL-48362BL1"
+  - "A-WCAP-WC001657-BL-COL-65820BL1"
+  - "A-WCAP-WC001658-BL-COL-50384BL1"
+  - "A-WCAP-WC001666-BL-COL-48390BL1"
+  - "A-WCAP-WC001656-BL-COL-57746BL1"
+  - "A-WCAP-WC001638-BL-COL-47008BL1"
+  - "A-WCAP-WC001639-BL-COL-41818BL1"
+  - "A-WCAP-WC001646-BL-COL-45962BL1"
+  - "A-WCAP-WC001652-BL-COL-47594BL1"
+  - "A-WCAP-WC001640-BL-COL-47537BL1"
+  - "A-WCAP-WC001629-BL-COL-69602BL1"
+  - "A-WCAP-WC001641-BL-COL-46986BL1"
+  - "A-WCAP-WC001645-BL-COL-41786BL1"
+  - "A-WCAP-WC001636-BL-COL-47553BL1"
+  - "A-WCAP-WC001634-BL-COL-50462BL1"
+  - "A-WCAP-WC001623-BL-COL-56498BL1"
+  - "A-WCAP-WC001627-BL-COL-56607BL1"
+  - "A-WCAP-WC001626-BL-COL-48292BL1"
+  - "A-WCAP-WC001621-BL-COL-48345BL1"
+  - "A-WCAP-WC001618-BL-COL-50400BL1"
+  - "A-WCAP-WC001622-BL-COL-50921BL1"
+  - "A-WCAP-WC001616-BL-COL-56626BL1"
+  - "A-WCAP-WC001617-BL-COL-45961BL1"
+  - "A-WCAP-WC001612-BL-COL-49158BL1"
+  - "A-WCAP-WC001608-BL-COL-48342BL1"
+  - "A-WCAP-WC001611-BL-COL-48295BL1"
+  - "A-WCAP-WC001605-BL-COL-45954BL1"
+  - "A-WCAP-WC001594-BL-COL-40429BL1"
+  - "A-WCAP-WC001606-BL-COL-48422BL1"
+  - "A-WCAP-WC001595-BL-COL-45200BL1"
+  - "A-WCAP-WC001602-BL-COL-45226BL1"
+  - "A-WCAP-WC001604-BL-COL-56480BL1"
+  - "A-WCAP-WC001598-BL-COL-49123BL1"
+  - "A-WCAP-WC001603-BL-COL-56489BL1"
+  - "A-WCAP-WC001587-BL-COL-45975BL1"
+  - "A-WCAP-WC001593-BL-COL-45249BL1"
+  - "A-WCAP-WC001589-BL-COL-55018BL1"
+  - "A-WCAP-WC001577-BL-COL-48318BL1"
+  - "A-WCAP-WC001586-BL-COL-45943BL1"
+  - "A-WCAP-WC001585-BL-COL-45991BL1"
+  - "A-WCAP-WC001584-BL-COL-45976BL1"
+  - "A-WCAP-WC001581-BL-COL-56483BL1"
+  - "A-WCAP-WC001582-BL-COL-47610BL1"
+  - "A-WCAP-WC001572-BL-COL-45937BL1"
+  - "A-WCAP-WC001574-BL-COL-56642BL1"
+  - "A-WCAP-WC001567-BL-COL-45235BL1"
+  - "A-WCAP-WC001573-BL-COL-46034BL1"
+  - "A-WCAP-WC001568-BL-COL-50455BL1"
+  - "A-WCAP-WC001566-BL-COL-56566BL1"
+  - "A-WCAP-WC001559-BL-COL-49283BL1"
+  - "A-WCAP-WC001556-BL-COL-45259BL1"
+  - "A-WCAP-WC001564-BL-COL-39158BL1"
+  - "A-WCAP-WC001552-BL-COL-55020BL1"
+  - "A-WCAP-WC001563-BL-COL-41119BL1"
+  - "A-WCAP-WC001558-BL-COL-48277BL1"
+  - "A-WCAP-WC001545-BL-COL-45981BL1"
+  - "A-WCAP-WC001547-BL-COL-57785BL1"
+  - "A-WCAP-WC001543-BL-COL-45240BL1"
+  - "A-WCAP-WC001542-BL-COL-40920BL1"
+  - "A-WCAP-WC001541-BL-COL-41762BL1"
+  - "A-WCAP-WC001544-BL-COL-49161BL1"
+splitvcfdirs:
+  - class: Directory
+    location: keep:47e5a217867e6a69efe10378541b38e7+7816
+  - class: Directory
+    location: keep:00d1dac7ab3769e1b600129643b3f7bc+8361
+  - class: Directory
+    location: keep:27e8ae061969d12e6137aa9b2e9b585c+6128
+  - class: Directory
+    location: keep:24eb9ef3c45fae2569077d429f121bbd+8778
+  - class: Directory
+    location: keep:00a065a8a5e71acfd083172de3a86d4b+6930
+  - class: Directory
+    location: keep:c751a4f4778156180605ebc04bad6cc1+5894
+  - class: Directory
+    location: keep:7f94ff84914a9d0d873f5313e7124c55+5642
+  - class: Directory
+    location: keep:987b13f6ad4974b796cc2ec8f270d19d+6356
+  - class: Directory
+    location: keep:2284173a8cbcf3e950a41d385069d327+7622
+  - class: Directory
+    location: keep:db084c1516d23a4c6c746105d58a08f3+6356
+  - class: Directory
+    location: keep:a823e23a5f0822a981fa720a2bcb2287+6062
+  - class: Directory
+    location: keep:c7bd505077ecd399b59176f8d5bc34aa+5808
+  - class: Directory
+    location: keep:a590e7f19b8f19f1babdd8b7795e1c20+5681
+  - class: Directory
+    location: keep:1e1232a1a1df7b39605a4630464ef864+5517
+  - class: Directory
+    location: keep:f5c57ac585ab060ca4f9af439acd3e78+5682
+  - class: Directory
+    location: keep:714619e20a6cb2220fae47d5519d2b9f+5093
+  - class: Directory
+    location: keep:d572b35e03d4a2545e4c917506738917+5383
+  - class: Directory
+    location: keep:cca9ddebe18cdde474f9b9ceb33c0247+5768
+  - class: Directory
+    location: keep:55a3599a3b6adac75c19772f2fd0e080+5683
+  - class: Directory
+    location: keep:f4725fe4ae3032ff1f6852701aef182b+5176
+  - class: Directory
+    location: keep:a046387c19b4b4e92d3728cfb5c2239d+5468
+  - class: Directory
+    location: keep:ca6aabbfe01db391a27c755f413f7e24+5010
+  - class: Directory
+    location: keep:f880cb3c4fb2dee626a7afdce73f6b35+5051
+  - class: Directory
+    location: keep:213d15a3e1e2642b593449943d54f940+5851
+  - class: Directory
+    location: keep:e5df0c86fe692bdc234acdea89c09735+5512
+  - class: Directory
+    location: keep:3dcd72a55d24501eb4eeab04e735bc9e+6058
+  - class: Directory
+    location: keep:a56ad75332c6e504237d20f17006b306+5558
+  - class: Directory
+    location: keep:15fae2b96e4f4c28e9473755ade2beb0+5515
+  - class: Directory
+    location: keep:7be46744f59209dd25710bfa8bfb59d0+6527
+  - class: Directory
+    location: keep:bd0a1e4399598a231a8e78c475e94e22+5769
+  - class: Directory
+    location: keep:209ab994cae2c50c0f0f409cecfe0af4+5343
+  - class: Directory
+    location: keep:507185affe0707d6eb0269008717be79+4756
+  - class: Directory
+    location: keep:57af40c026feaf3da5ab7e095caeeae1+5725
+  - class: Directory
+    location: keep:3f515cba6b180bb7aa151ab05ce43270+5936
+  - class: Directory
+    location: keep:15350af160d548a437e45a1ca0432363+7786
+  - class: Directory
+    location: keep:dd76f2ee8fc0579b64e685d30e5d9922+5427
+  - class: Directory
+    location: keep:fde51804f15e0fea5a6bb7be37e1d262+6734
+  - class: Directory
+    location: keep:141fb2f192c4e1efdb6e373543022ab7+6568
+  - class: Directory
+    location: keep:2a96bea877d4a9cf25753c1298f34e58+6612
+  - class: Directory
+    location: keep:6edb216921b036a20cdd32583f2970b6+6022
+  - class: Directory
+    location: keep:f26422b6b05b8bb1e9f486e5c09051b2+5640
+  - class: Directory
+    location: keep:4ed2571eace3eb2963867ca835862646+6061
+  - class: Directory
+    location: keep:a5bf8756702a8f79723d3b134a8c6cff+5725
+  - class: Directory
+    location: keep:05fe61865950248bc6ed9f732426f42a+5385
+  - class: Directory
+    location: keep:e48587348ce4b238ad6594f3a862fca7+7832
+  - class: Directory
+    location: keep:802ba4f4f4a04e53f9e3120f5a871fd9+6902
+  - class: Directory
+    location: keep:22b66f6b397d2e051740d0b3b896c13c+5892
+  - class: Directory
+    location: keep:a6666076ab9bf6963e52d82206b17581+5429
+  - class: Directory
+    location: keep:ed99ae4b5448d5e998444cdc2d288c4f+5978
+  - class: Directory
+    location: keep:31e562eecd2259dc0404f83f138eb13a+6814
+  - class: Directory
+    location: keep:4b247a882829c85824ca49309e51f8b3+5470
+  - class: Directory
+    location: keep:4a3a45a029be557ceb627050b278404c+5097
+  - class: Directory
+    location: keep:96576082494eacc33d34891643247e16+5639
+  - class: Directory
+    location: keep:7849fd811c58ff9797956ca88885c072+5134
+  - class: Directory
+    location: keep:8281291d46712c4dee2929be01a8459d+5935
+  - class: Directory
+    location: keep:afdc5af01594e0e0372ab17287575db3+5427
+  - class: Directory
+    location: keep:aa783333788f5dd554055074ed4cb5ab+5384
+  - class: Directory
+    location: keep:21c8d76ef6ee9950cd2bb641b226a57b+5934
+  - class: Directory
+    location: keep:57a4338099666f13ab7cd05bad7c67c8+5892
+  - class: Directory
+    location: keep:04a1b83e91062b8c43eb3d470aaa6c64+5051
+  - class: Directory
+    location: keep:ad31d97aa3d355a666fe07da625f3994+6482
+  - class: Directory
+    location: keep:98acdba4fb52ac698eaad7449660227e+5517
+  - class: Directory
+    location: keep:03b7cd1daf28b6dcc913a45342f37c96+6482
+  - class: Directory
+    location: keep:60c42c1bad792d1d1ebc4c40420e8032+7030
+  - class: Directory
+    location: keep:3e7ef8e480273a67e223db2842d38e43+5513
+  - class: Directory
+    location: keep:b8ce59383ab761c76b35c91773409bf8+6692
+  - class: Directory
+    location: keep:2c7a882d3f13a0299baf866dc83872d7+7029
+  - class: Directory
+    location: keep:8740baf8f9730eff6d40a918a4c20f90+5384
+  - class: Directory
+    location: keep:8e63209016939215a48def1b350dee0a+6650
+  - class: Directory
+    location: keep:cee36503dcd257a70630396eab59e6c1+6481
+  - class: Directory
+    location: keep:d17a17d9fb4d05cbcadde06b99fae806+5430
+  - class: Directory
+    location: keep:c7f9b800e363290047d61904cc872c3e+5769
+  - class: Directory
+    location: keep:2dee32c1ab8b1fcc264458ae2609a18c+4887
+  - class: Directory
+    location: keep:9e18a67bc403b4d51ee4f556c597b689+5932
+  - class: Directory
+    location: keep:7bf3c8ef6a8ed7b4563569e1e4b85154+5051
+  - class: Directory
+    location: keep:cf3ca53fe3fe7955cd8993c9f2bdd24d+5682
+  - class: Directory
+    location: keep:8716d3eff15d14b0a072e481698fc715+6485
+  - class: Directory
+    location: keep:14b53d263217e13caf5755c66b8f9232+4884
+  - class: Directory
+    location: keep:df79fb7025f8706f20ed678e1916fd15+5424
+  - class: Directory
+    location: keep:e90c5745c169fc9f945463fcd065cdfd+4842
+  - class: Directory
+    location: keep:2e65619e3e557ae435b8b24cad86ce0e+6440
+  - class: Directory
+    location: keep:a67a94826b54eb78a4c3e582233579f8+6482
+  - class: Directory
+    location: keep:c184cd5457f7026ba8112492e3741036+6397
+  - class: Directory
+    location: keep:d5c5c607fd49300d94ecd7de39592eff+5177
+  - class: Directory
+    location: keep:3c7bf8b576bce2567590f90362ef0edf+5009
+  - class: Directory
+    location: keep:f83f53f0698ad52a1cb9b265a451eba8+5051
+  - class: Directory
+    location: keep:7680642fc1c8741d6657ca8b30675661+5013
+  - class: Directory
+    location: keep:55e6c7dc3edefb6625ee47ddfbe86f10+6648
+  - class: Directory
+    location: keep:0abaab02ff171c7a3d283ec54c845498+5515
+  - class: Directory
+    location: keep:dc949aef3a7959dc5259aa9e5caff0ac+6525
+  - class: Directory
+    location: keep:52f5abd360f99207bc7266f8f3b4e2ba+5512
+  - class: Directory
+    location: keep:3c0cb444429a6cf0be2ffc6e0359a345+6524
+  - class: Directory
+    location: keep:c6d33856d6620ed3b7dfcfaa9e4fa987+5343
+  - class: Directory
+    location: keep:84dc794f57a9fbf51f92a9add486702b+5341
+  - class: Directory
+    location: keep:f1b7173ab79d41035044f8ffa7ea5595+7956
+  - class: Directory
+    location: keep:7362951a09e4177e83af2ce779700ab8+6188
+  - class: Directory
+    location: keep:5f45b74d16fc04376ff3a16d30518ebb+5935
+  - class: Directory
+    location: keep:0a35ed7284d0851c7a2698026837c604+6900
+  - class: Directory
+    location: keep:0f3da67b2ad0df2886e7fe1e1c1b5338+6777
+  - class: Directory
+    location: keep:7ad01e84179c627d92b067ebe0c1e7bb+5893
diff --git a/cwl/lightning/yml/lightning-export-numpy-merged.yml b/cwl/lightning/yml/lightning-export-numpy-merged.yml
new file mode 100644 (file)
index 0000000..471a89f
--- /dev/null
@@ -0,0 +1,8 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+lib:
+  class: File
+  location: keep:921ce01c909acc0068b820ca48d3fcde+1542/library.gob.gz
+chunks: 1
diff --git a/cwl/lightning/yml/lightning-import-ref37.yml b/cwl/lightning/yml/lightning-import-ref37.yml
new file mode 100644 (file)
index 0000000..b092cde
--- /dev/null
@@ -0,0 +1,11 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+saveincomplete: "true"
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+fastadirs:
+  - class: Directory
+    location: keep:3cf4b6ed2bf8cd3abc27cb5a79641a86+755
diff --git a/cwl/lightning/yml/lightning-import-ref38.yml b/cwl/lightning/yml/lightning-import-ref38.yml
new file mode 100644 (file)
index 0000000..fbb4922
--- /dev/null
@@ -0,0 +1,11 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+saveincomplete: "true"
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+fastadir:
+  class: Directory
+  location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761
diff --git a/cwl/lightning/yml/lightning-import-testdata.yml b/cwl/lightning/yml/lightning-import-testdata.yml
new file mode 100644 (file)
index 0000000..3f04854
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+saveincomplete: "false"
+tagset:
+  class: File
+  location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz
+fastadirs:
+  - class: Directory
+    location: keep:2xpu4-4zz18-6lg2a1uoanyiyzv
+  - class: Directory
+    location: keep:2xpu4-4zz18-bwlg17dkckptebn
+  - class: Directory
+    location: keep:2xpu4-4zz18-31tn6gfh4rn6rqb
diff --git a/cwl/lightning/yml/lightning-merge-testdata_ref38.yml b/cwl/lightning/yml/lightning-merge-testdata_ref38.yml
new file mode 100644 (file)
index 0000000..6d4e416
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+lib1:
+  class: File
+  location: keep:9ecc22a5a7820128bb1fbf1b457848a9+1525/library.gob.gz
+lib2:
+  class: File
+  location: keep:2a134f80896100f67fbb0a1d9dfb3ee8+1107/library.gob.gz
diff --git a/cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-0831_0315.yml b/cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-0831_0315.yml
new file mode 100644 (file)
index 0000000..e7d9d35
--- /dev/null
@@ -0,0 +1,12 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+matchgenome: ""
+libdir:
+  class: Directory
+  location: keep:37bfc41e8ce21b8c1719cf4790f454c6+656483
+regions: null
+threads: 10
+mergeoutput: "false"
+expandregions: 0
diff --git a/cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-test.yml b/cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-test.yml
new file mode 100644 (file)
index 0000000..e5affaa
--- /dev/null
@@ -0,0 +1,12 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+matchgenome: ""
+libdir:
+  class: Directory
+  location: keep:0eb748be8b4ce392eb405b71199a2ef1+54032
+regions: null
+threads: 10
+mergeoutput: "false"
+expandregions: 0
diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-A-ADC.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-A-ADC.yml
new file mode 100644 (file)
index 0000000..36ceedc
--- /dev/null
@@ -0,0 +1,14 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+matchgenome: "A-ADC"
+libdir:
+  class: Directory
+  location: keep:5b615d7692137b25e0411801e7c37b0c+656435
+threads: 10
+mergeoutput: "false"
+expandregions: 0
+phenotypesdir:
+  class: Directory
+  location: keep:9b76b33c948bba77f2bf58f915cff9f4+120
diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-A-CUHS.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-A-CUHS.yml
new file mode 100644 (file)
index 0000000..bb65d52
--- /dev/null
@@ -0,0 +1,14 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+matchgenome: "A-CUHS"
+libdir:
+  class: Directory
+  location: keep:5b615d7692137b25e0411801e7c37b0c+656435
+threads: 10
+mergeoutput: "false"
+expandregions: 0
+phenotypesdir:
+  class: Directory
+  location: keep:9b76b33c948bba77f2bf58f915cff9f4+120
diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-A-IIAA.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-A-IIAA.yml
new file mode 100644 (file)
index 0000000..1f95cbc
--- /dev/null
@@ -0,0 +1,14 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+matchgenome: "A-IIAA"
+libdir:
+  class: Directory
+  location: keep:5b615d7692137b25e0411801e7c37b0c+656435
+threads: 10
+mergeoutput: "false"
+expandregions: 0
+phenotypesdir:
+  class: Directory
+  location: keep:9b76b33c948bba77f2bf58f915cff9f4+120
diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-ADNI.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-ADNI.yml
new file mode 100644 (file)
index 0000000..d4f795d
--- /dev/null
@@ -0,0 +1,14 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+matchgenome: "ADNI"
+libdir:
+  class: Directory
+  location: keep:5b615d7692137b25e0411801e7c37b0c+656435
+threads: 10
+mergeoutput: "false"
+expandregions: 0
+phenotypesdir:
+  class: Directory
+  location: keep:9b76b33c948bba77f2bf58f915cff9f4+120
diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-full.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-full.yml
new file mode 100644 (file)
index 0000000..7b91088
--- /dev/null
@@ -0,0 +1,14 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+matchgenome: ""
+libdir:
+  class: Directory
+  location: keep:5b615d7692137b25e0411801e7c37b0c+656435
+threads: 10
+mergeoutput: "false"
+expandregions: 0
+phenotypesdir:
+  class: Directory
+  location: keep:9b76b33c948bba77f2bf58f915cff9f4+120
diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-pvalue1e-8-full.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-pvalue1e-8-full.yml
new file mode 100644 (file)
index 0000000..d991c5b
--- /dev/null
@@ -0,0 +1,14 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+matchgenome: ""
+libdir:
+  class: Directory
+  location: keep:25d3137ac3d35c5600a1b81ad8b64ec3+656017
+threads: 10
+mergeoutput: "false"
+expandregions: 0
+phenotypesdir:
+  class: Directory
+  location: keep:9b76b33c948bba77f2bf58f915cff9f4+120
diff --git a/cwl/lightning/yml/lightning-tiling-stats-ref37.yml b/cwl/lightning/yml/lightning-tiling-stats-ref37.yml
new file mode 100644 (file)
index 0000000..71171d5
--- /dev/null
@@ -0,0 +1,7 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+libdir:
+  class: Directory
+  location: keep:3ae12ac1758199f567ff4ce0e41d72cc+1107
diff --git a/cwl/preprocess/cgivar/bedtools-intersect.cwl b/cwl/preprocess/cgivar/bedtools-intersect.cwl
new file mode 100644 (file)
index 0000000..b3b0bbe
--- /dev/null
@@ -0,0 +1,45 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: Intersect VCF with BED
+requirements:
+  ShellCommandRequirement: {}
+inputs:
+  vcf:
+    type: File
+    label: Input VCF
+  bed:
+    type: File
+    label: Input BED
+outputs:
+  vcfgz:
+    type: File
+    label: Output VCF with records inside the BED region
+    outputBinding:
+      glob: "*.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [bedtools, intersect]
+arguments:
+  - "-header"
+  - prefix: "-a"
+    valueFrom: $(inputs.vcf)
+  - prefix: "-b"
+    valueFrom: $(inputs.bed)
+  - prefix: "-f"
+    valueFrom: "1"
+  - shellQuote: false
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: false
+    valueFrom: ">"
+  - $(inputs.vcf.basename).gz
+  - shellQuote: false
+    valueFrom: "&&"
+  - "tabix"
+  - prefix: "-p"
+    valueFrom: "vcf"
+  - $(inputs.vcf.basename).gz
diff --git a/cwl/preprocess/cgivar/cgatools-mkvcf.cwl b/cwl/preprocess/cgivar/cgatools-mkvcf.cwl
new file mode 100644 (file)
index 0000000..fc6bd02
--- /dev/null
@@ -0,0 +1,29 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: Convert CGIVAR to VCF
+inputs:
+  cgascript:
+    type: File
+    label: Script invoking cgatools
+  reference:
+    type: File
+    label: CRR reference used for cgatools
+  cgivar:
+    type: File
+    label: Input CGIVAR
+  sample:
+    type: string
+    label: Sample name
+outputs:
+  vcf:
+    type: stdout
+    label: Output VCF
+arguments:
+  - $(inputs.cgascript)
+  - $(inputs.reference)
+  - $(inputs.cgivar)
+stdout: $(inputs.sample).vcf
diff --git a/cwl/preprocess/cgivar/cgatools-mkvcf.sh b/cwl/preprocess/cgivar/cgatools-mkvcf.sh
new file mode 100755 (executable)
index 0000000..4ddf24c
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/bin/sh
+
+REFERENCE=$1
+CGIVAR=$2
+
+cgatools mkvcf --beta --reference $REFERENCE --include-no-calls --field-names GT,GQ,DP,AD --source-names masterVar --master-var $CGIVAR || true
diff --git a/cwl/preprocess/cgivar/cgivar2vcfbed-wf.cwl b/cwl/preprocess/cgivar/cgivar2vcfbed-wf.cwl
new file mode 100644 (file)
index 0000000..0628a08
--- /dev/null
@@ -0,0 +1,69 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.0
+class: Workflow
+label: Convert CGIVAR to VCF and BED region
+requirements:
+  arv:RunInSingleContainer: {}
+hints:
+  DockerRequirement:
+    dockerPull: cgivar2vcfbed
+  ResourceRequirement:
+    ramMin: 12000
+inputs:
+  cgivar:
+    type: File
+    label: Input CGIVAR
+  sample:
+    type: string
+    label: Sample name
+  reference:
+    type: File
+    label: CRR reference used for cgatools
+  cgascript:
+    type: File
+    label: Script invoking cgatools
+  fixscript:
+    type: File
+    label: Script to fix VCF
+
+outputs:
+  vcfgz:
+    type: File
+    label: Output VCF
+    outputSource: bedtools-intersect/vcfgz
+  bed:
+    type: File
+    label: BED region VCF
+    outputSource: gvcf_regions/bed
+
+steps:
+  cgatools-mkvcf:
+    run: cgatools-mkvcf.cwl
+    in:
+      cgascript: cgascript
+      reference: reference
+      cgivar: cgivar
+      sample: sample
+    out: [vcf]
+  fix_vcf:
+    run: fix_vcf.cwl
+    in:
+      fixscript: fixscript
+      vcf: cgatools-mkvcf/vcf
+    out: [fixedvcf]
+  gvcf_regions:
+    run: gvcf_regions.cwl
+    in:
+      vcf: fix_vcf/fixedvcf
+    out: [bed]
+  bedtools-intersect:
+    run: bedtools-intersect.cwl
+    in:
+      vcf: fix_vcf/fixedvcf
+      bed: gvcf_regions/bed
+    out: [vcfgz]
diff --git a/cwl/preprocess/cgivar/fix_vcf.cwl b/cwl/preprocess/cgivar/fix_vcf.cwl
new file mode 100644 (file)
index 0000000..29e2a78
--- /dev/null
@@ -0,0 +1,24 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: Fix VCF with an extra period in the INFO field
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  fixscript:
+    type: File
+    label: Script to fix VCF
+  vcf:
+    type: File
+    label: Input VCF
+outputs:
+  fixedvcf:
+    type: stdout
+    label: Fixed VCF
+arguments:
+  - $(inputs.fixscript)
+  - $(inputs.vcf)
+stdout: $(inputs.vcf.nameroot).vcf
diff --git a/cwl/preprocess/cgivar/fix_vcf.py b/cwl/preprocess/cgivar/fix_vcf.py
new file mode 100755 (executable)
index 0000000..dc21275
--- /dev/null
@@ -0,0 +1,45 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/usr/bin/env python
+
+import sys
+
+def is_header(line):
+    """Check if a line is header."""
+
+    return line.startswith('#')
+
+def has_END(line):
+    """Check if a line has the 'END=' tag."""
+
+    return 'END=' in line
+
+# FIELD index
+# CHROM 0, POS 1, REF 3, QUAL 5, INFO 7, FORMAT 8, sample 9
+
+def fix_END(line):
+
+    all_fields = line.split('\t')
+    INFO = all_fields[7]
+    INFO_fields = INFO.split(';')
+    for i, INFO_field in enumerate(INFO_fields):
+        if INFO_field.split('=')[0] == 'END':
+            INFO_fields[i] = INFO_fields[i].replace('.', '')
+
+    all_fields[7] = ';'.join(INFO_fields)
+    line = '\t'.join(all_fields)
+
+    return line
+
+if __name__ == '__main__':
+    vcf = sys.argv[1]
+    with open(vcf) as g:
+        for line in g:
+            if is_header(line):
+                print line.strip()
+            elif has_END(line):
+                print fix_END(line).strip()
+            else:
+                print line.strip()
diff --git a/cwl/preprocess/cgivar/getfiles.cwl b/cwl/preprocess/cgivar/getfiles.cwl
new file mode 100644 (file)
index 0000000..918a72f
--- /dev/null
@@ -0,0 +1,34 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: ExpressionTool
+label: Create list of CGIVARs to process
+inputs:
+  dir:
+    type: Directory
+    label: Input directory of CGIVARs
+outputs:
+  cgivars:
+    type: File[]
+    label: Output CGIVARs
+  samples:
+    type: string[]
+    label: Sample names of CGIVARs
+requirements:
+  InlineJavascriptRequirement: {}
+expression: |
+  ${
+    var cgivars = [];
+    var samples = [];
+    for (var i = 0; i < inputs.dir.listing.length; i++) {
+      var file = inputs.dir.listing[i];
+      if (file.nameext == ".bz2") {
+        cgivars.push(file);
+        var sample = file.basename.split(".")[0];
+        samples.push(sample);
+      }
+    }
+    return {"cgivars": cgivars, "samples": samples};
+  }
diff --git a/cwl/preprocess/cgivar/gvcf_regions.cwl b/cwl/preprocess/cgivar/gvcf_regions.cwl
new file mode 100644 (file)
index 0000000..f866543
--- /dev/null
@@ -0,0 +1,20 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: Extract called region from GVCF
+inputs:
+  vcf:
+    type: File
+    label: Input GVCF
+outputs:
+  bed:
+    type: stdout
+    label: BED region of GVCF
+baseCommand: /gvcf_regions/gvcf_regions.py
+arguments:
+  - $(inputs.vcf)
+  - "--unreported_is_called"
+stdout: $(inputs.vcf.nameroot).bed
diff --git a/cwl/preprocess/cgivar/scatter-cgivar2vcfbed-wf.cwl b/cwl/preprocess/cgivar/scatter-cgivar2vcfbed-wf.cwl
new file mode 100644 (file)
index 0000000..4984e0c
--- /dev/null
@@ -0,0 +1,62 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.0
+class: Workflow
+label: Scatter to convert CGIVARs to VCFs and BEDs
+requirements:
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: cgivar2vcfbed
+inputs:
+  cgivarsdir:
+    type: Directory
+    label: Input directory of CGIVARs
+  reference:
+    type: File
+    label: CRR reference used for cgatools
+  cgascript:
+    type: File
+    label: Script invoking cgatools
+    default:
+      class: File
+      location: cgatools-mkvcf.sh
+  fixscript:
+    type: File
+    label: Script to fix VCF
+    default:
+      class: File
+      location: fix_vcf.py
+
+outputs:
+  vcfgzs:
+    type: File[]
+    label: Output VCFs
+    outputSource: cgivar2vcfbed-wf/vcfgz
+  beds:
+    type: File[]
+    label: Output BEDs
+    outputSource: cgivar2vcfbed-wf/bed
+
+steps:
+  getfiles:
+    run: getfiles.cwl
+    in:
+      dir: cgivarsdir
+    out: [cgivars, samples]
+  cgivar2vcfbed-wf:
+    run: cgivar2vcfbed-wf.cwl
+    scatter: [cgivar, sample]
+    scatterMethod: dotproduct
+    in:
+      cgivar: getfiles/cgivars
+      sample: getfiles/samples
+      reference: reference
+      cgascript: cgascript
+      fixscript: fixscript
+    out: [vcfgz, bed]
diff --git a/cwl/preprocess/cgivar/yml/cgivar2vcfbed-wf-test.yml b/cwl/preprocess/cgivar/yml/cgivar2vcfbed-wf-test.yml
new file mode 100644 (file)
index 0000000..22cc7dd
--- /dev/null
@@ -0,0 +1,17 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cgivar:
+  class: File
+  location: keep:c9262dd71f8421735b3dde4e1b5b342f+86993/hu011C57_var-GS000015172-ASM.tsv.bz2
+sample: "hu011C57_var-GS000015172-ASM"
+reference:
+  class: File
+  location: keep:320d7a6717aa7b555a09e52976ba4a02+18534/build37.crr
+cgascript:
+  class: File
+  location: ../cgatools-mkvcf.sh
+fixscript:
+  class: File
+  location: ../fix_vcf.py
diff --git a/cwl/preprocess/cgivar/yml/scatter-cgivar2vcfbed-wf-367set.yml b/cwl/preprocess/cgivar/yml/scatter-cgivar2vcfbed-wf-367set.yml
new file mode 100644 (file)
index 0000000..996a373
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cgivarsdir:
+  class: Directory
+  location: keep:c9262dd71f8421735b3dde4e1b5b342f+86993
+reference:
+  class: File
+  location: keep:320d7a6717aa7b555a09e52976ba4a02+18534/build37.crr
diff --git a/cwl/preprocess/chrmvcf/change_gt_M.js b/cwl/preprocess/chrmvcf/change_gt_M.js
new file mode 100644 (file)
index 0000000..5d8e723
--- /dev/null
@@ -0,0 +1,16 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+function record() {
+  if (CHROM == 'M') {
+    var inputGT = SAMPLES[0].GT;
+    if (inputGT.indexOf('/') == -1 && inputGT.indexOf('|') == -1 ) {
+      SAMPLES[0].GT = inputGT + "/" + inputGT;
+    } else if (inputGT.indexOf('/') != -1 && inputGT.split('/')[0] != inputGT.split('/')[1]) {
+      return false;
+    } else if (inputGT.indexOf('|') != -1 && inputGT.split('|')[0] != inputGT.split('|')[1]) {
+      return false;
+    }
+  }
+}
diff --git a/cwl/preprocess/chrmvcf/change_gt_chrM.js b/cwl/preprocess/chrmvcf/change_gt_chrM.js
new file mode 100644 (file)
index 0000000..3b38c92
--- /dev/null
@@ -0,0 +1,16 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+function record() {
+  if (CHROM == 'chrM') {
+    var inputGT = SAMPLES[0].GT;
+    if (inputGT.indexOf('/') == -1 && inputGT.indexOf('|') == -1 ) {
+      SAMPLES[0].GT = inputGT + "/" + inputGT;
+    } else if (inputGT.indexOf('/') != -1 && inputGT.split('/')[0] != inputGT.split('/')[1]) {
+      return false;
+    } else if (inputGT.indexOf('|') != -1 && inputGT.split('|')[0] != inputGT.split('|')[1]) {
+      return false;
+    }
+  }
+}
diff --git a/cwl/preprocess/chrmvcf/fixchrm-wf.cwl b/cwl/preprocess/chrmvcf/fixchrm-wf.cwl
new file mode 100644 (file)
index 0000000..52878fd
--- /dev/null
@@ -0,0 +1,36 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: Workflow
+label: Scatter to fix VCF by processing chrM
+requirements:
+  ScatterFeatureRequirement: {}
+inputs:
+  vcfdir:
+    type: Directory
+    label: Input VCF directory
+  filterjs:
+    type: File
+    label: Javascript code for filtering
+
+outputs:
+  fixedvcfs:
+    type: File[]
+    label: Fixed VCFs
+    outputSource: fixchrm/fixedvcf
+
+steps:
+  getfiles:
+    run: getfiles.cwl
+    in:
+      dir: vcfdir
+    out: [vcfs]
+  fixchrm:
+    run: fixchrm.cwl
+    scatter: [vcf]
+    in:
+      vcf: getfiles/vcfs
+      filterjs: filterjs
+    out: [fixedvcf]
diff --git a/cwl/preprocess/chrmvcf/fixchrm.cwl b/cwl/preprocess/chrmvcf/fixchrm.cwl
new file mode 100644 (file)
index 0000000..5c06585
--- /dev/null
@@ -0,0 +1,41 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: CommandLineTool
+label: Fix VCF by processing chrM
+requirements:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 8000
+hints:
+  arv:RuntimeConstraints:
+    keep_cache: 4096
+inputs:
+  vcf:
+    type: File
+    label: Input VCF file
+  filterjs:
+    type: File
+    label: Javascript code for filtering
+outputs:
+  fixedvcf:
+    type: File
+    label: Fixed VCF
+    outputBinding:
+      glob: "*vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [rtg, vcffilter]
+arguments:
+  - prefix: "-i"
+    valueFrom: $(inputs.vcf)
+  - prefix: "-o"
+    valueFrom: $(inputs.vcf.basename)
+  - prefix: "--javascript"
+    valueFrom: $(inputs.filterjs)
diff --git a/cwl/preprocess/chrmvcf/getfiles.cwl b/cwl/preprocess/chrmvcf/getfiles.cwl
new file mode 100644 (file)
index 0000000..60dd213
--- /dev/null
@@ -0,0 +1,28 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: ExpressionTool
+label: Create list of VCFs to process
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  dir:
+    type: Directory
+    label: Input directory of VCFs
+outputs:
+  vcfs:
+    type: File[]
+    label: Output VCFs
+expression: |
+  ${
+    var vcfs = [];
+    for (var i = 0; i < inputs.dir.listing.length; i++) {
+      var file = inputs.dir.listing[i];
+      if (file.nameext == ".gz") {
+        vcfs.push(file);
+      }
+    }
+    return {"vcfs": vcfs};
+  }
diff --git a/cwl/preprocess/chrmvcf/yml/fixchrm-test.yml b/cwl/preprocess/chrmvcf/yml/fixchrm-test.yml
new file mode 100644 (file)
index 0000000..e34ed82
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcf:
+  class: File
+  location: keep:61ee4a11243384b7f7dfc76291edcab8+85964/filtered_hu01F73B_var-GS000037833-ASM.vcf.gz
+filterjs:
+  class: File
+  location: ../change_gt_M.js
diff --git a/cwl/preprocess/chrmvcf/yml/fixchrm-wf-1kcgi.yml b/cwl/preprocess/chrmvcf/yml/fixchrm-wf-1kcgi.yml
new file mode 100644 (file)
index 0000000..922bdc1
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfdir:
+  class: Directory
+  location: keep:ff0df425ebf968cf38ec0574940040e2+122387
+filterjs:
+  class: File
+  location: ../change_gt_M.js
diff --git a/cwl/preprocess/chrmvcf/yml/fixchrm-wf-harvardpgpcgi.yml b/cwl/preprocess/chrmvcf/yml/fixchrm-wf-harvardpgpcgi.yml
new file mode 100644 (file)
index 0000000..5253959
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfdir:
+  class: Directory
+  location: keep:61ee4a11243384b7f7dfc76291edcab8+85964
+filterjs:
+  class: File
+  location: ../change_gt_M.js
diff --git a/cwl/preprocess/chrmvcf/yml/fixchrm-wf-simons.yml b/cwl/preprocess/chrmvcf/yml/fixchrm-wf-simons.yml
new file mode 100644 (file)
index 0000000..656ef63
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfdir:
+  class: Directory
+  location: keep:73a438da5ff6bb4a3956ae1a05e574a0+84041
+filterjs:
+  class: File
+  location: ../change_gt_M.js
diff --git a/cwl/preprocess/gvcf/filtercleangvcf-wf.cwl b/cwl/preprocess/gvcf/filtercleangvcf-wf.cwl
new file mode 100644 (file)
index 0000000..50f3016
--- /dev/null
@@ -0,0 +1,48 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: Workflow
+label: Filters gVCFs by a specified quality cutoff and cleans
+requirements:
+  ScatterFeatureRequirement: {}
+hints:
+  arv:RuntimeConstraints:
+    keep_cache: 4096
+
+inputs:
+  gvcfdir:
+    type: Directory
+    label: Input gVCF directory
+  cutoff:
+    type: int
+    label: Filtering cutoff threshold
+  keepgqdot:
+    type: boolean?
+    label: Flag for keeping GQ represented by "."
+
+outputs:
+  filteredcleangvcfs:
+    type: File[]
+    label: Filtered clean gVCFs
+    outputSource: filtercleangvcf/filteredcleangvcf
+
+steps:
+  getfiles:
+    run: getfiles.cwl
+    in:
+      gvcfdir: gvcfdir
+    out: [gvcfs]
+
+  filtercleangvcf:
+    run: filtercleangvcf.cwl
+    scatter: gvcf
+    in:
+      gvcf: getfiles/gvcfs
+      keepgqdot: keepgqdot
+      cutoff: cutoff
+    out: [filteredcleangvcf]
diff --git a/cwl/preprocess/gvcf/filtercleangvcf.cwl b/cwl/preprocess/gvcf/filtercleangvcf.cwl
new file mode 100644 (file)
index 0000000..a81351d
--- /dev/null
@@ -0,0 +1,75 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: CommandLineTool
+label: Filters gVCFs by a specified quality cutoff and cleans
+requirements:
+  DockerRequirement:
+    dockerPull: arvados/l7g
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 8000
+  ShellCommandRequirement: {}
+hints:
+  arv:RuntimeConstraints:
+    keep_cache: 4096
+inputs:
+  gvcf:
+    type: File
+    label: Input gVCF file
+    secondaryFiles: [.tbi]
+  filtergvcf:
+    type: File
+    label: Code that filters gVCFs
+    default:
+      class: File
+      location: src/filter-gvcf
+  cutoff:
+    type: int
+    label: Filtering cutoff threshold
+  keepgqdot:
+    type: boolean?
+    label: Flag for keeping GQ represented by "."
+  cleanvcf:
+    type: File
+    label: Code that cleans gVCFs
+    default:
+      class: File
+      location: src/cleanvcf.py
+outputs:
+  filteredcleangvcf:
+    type: File
+    label: Filtered and clean gVCF
+    outputBinding:
+      glob: "*vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: zcat
+arguments:
+  - $(inputs.gvcf)
+  - shellQuote: false
+    valueFrom: "|"
+  - $(inputs.filtergvcf)
+  - prefix: "-k"
+    valueFrom: $(inputs.keepgqdot)
+  - $(inputs.cutoff)
+  - shellQuote: false
+    valueFrom: "|"
+  - $(inputs.cleanvcf)
+  - shellQuote: false
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: false
+    valueFrom: ">"
+  - $(inputs.gvcf.nameroot).gz
+  - shellQuote: false
+    valueFrom: "&&"
+  - "tabix"
+  - prefix: "-p"
+    valueFrom: "vcf"
+  - $(inputs.gvcf.nameroot).gz
diff --git a/cwl/preprocess/gvcf/getfiles.cwl b/cwl/preprocess/gvcf/getfiles.cwl
new file mode 100644 (file)
index 0000000..808da73
--- /dev/null
@@ -0,0 +1,41 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  cwltool: "http://commonwl.org/cwltool#"
+class: ExpressionTool
+label: Create list of gVCFs from directory
+cwlVersion: v1.0
+requirements:
+  InlineJavascriptRequirement: {}
+hints:
+  cwltool:LoadListingRequirement:
+    loadListing: shallow_listing
+inputs:
+  gvcfdir:
+    type: Directory
+    label: Directory of input gVCFs
+outputs:
+  gvcfs:
+    type: File[]
+    label: Array of gvcfs
+    secondaryFiles: [.tbi]
+expression: |
+  ${
+    var gvcfs = [];
+    for (var i = 0; i < inputs.gvcfdir.listing.length; i++) {
+      var file = inputs.gvcfdir.listing[i];
+      if (file.nameext == '.gz') {
+        var main = file;
+        for (var j = 0; j < inputs.gvcfdir.listing.length; j++) {
+          var file = inputs.gvcfdir.listing[j];
+          if (file.basename == main.basename+".tbi") {
+            main.secondaryFiles = [file];
+          }
+        }
+        gvcfs.push(main);
+      }
+    }
+    return {"gvcfs": gvcfs};
+  }
diff --git a/cwl/preprocess/gvcf/src/cleanvcf.py b/cwl/preprocess/gvcf/src/cleanvcf.py
new file mode 100755 (executable)
index 0000000..c6fb0c5
--- /dev/null
@@ -0,0 +1,40 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/usr/bin/env python
+
+from __future__ import print_function
+import sys
+
+def is_header(line):
+    """Check if a line is header."""
+
+    return line.startswith('#')
+
+# FIELD index
+# CHROM 0, POS 1, REF 3
+
+def main():
+    previous_CHROM = ""
+    previous_end_POS = 0
+
+    for line in sys.stdin:
+        if not is_header(line):
+            fields = line.split('\t')
+            CHROM = fields[0]
+            POS = int(fields[1])
+            REF = fields[3]
+            if CHROM == previous_CHROM:
+                if POS > previous_end_POS:
+                    print(line, end='')
+                    previous_end_POS = max(previous_end_POS, POS + len(REF) - 1)
+            else:
+                print(line, end='')
+                previous_end_POS = POS + len(REF) - 1
+            previous_CHROM = CHROM
+        else:
+            print(line, end='')
+
+if __name__ == '__main__':
+    main()
diff --git a/cwl/preprocess/gvcf/src/filter-gvcf b/cwl/preprocess/gvcf/src/filter-gvcf
new file mode 100755 (executable)
index 0000000..ff710fd
--- /dev/null
@@ -0,0 +1,60 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/usr/bin/python
+
+import argparse
+import sys
+
+
+def filter_gvcf():
+
+    # setting up inputs
+    parser = argparse.ArgumentParser(prog="filter-gvcf", description="Filter a \
+                                    gVCF with a user-set quality threshold.")
+    parser.add_argument("-k", "--keepGQdot", help="Keeps the variant when GQ \
+                        is '.'", action="store_true")
+    parser.add_argument("quality_threshold", metavar="QUALITY", help="Quality \
+                        threshold", type=int)
+    parser.add_argument("gvcf", metavar="GVCF", nargs='?',
+                        type=argparse.FileType('r'), default=sys.stdin, help="\
+                        Input gVCF to filter from stdin")
+
+    args = parser.parse_args()
+    keepGQdot = args.keepGQdot
+    quality_threshold = args.quality_threshold
+    gvcf = args.gvcf
+
+    for line in gvcf:
+        line = line.strip()
+
+        # retain header and info lines
+        if len(line) == 0:
+            continue
+        if line[0] == '#':
+            print line
+            continue
+
+        fields = line.split('\t')
+
+        if len(fields) < 10:
+            continue
+
+        FORMAT_fields = fields[8].split(":")
+        sample_fields = fields[9].split(":")
+
+        # filter quality scores below the threshold
+        try:
+            GQ_index = FORMAT_fields.index('GQ')
+            GQ = sample_fields[GQ_index]
+            if GQ.isdigit():
+                if quality_threshold <= int(GQ):
+                    print line
+            elif GQ == "." and keepGQdot:
+                print line
+        except ValueError:
+            print line
+
+if __name__ == '__main__':
+    filter_gvcf()
diff --git a/cwl/preprocess/gvcf/yml/filtercleangvcf-test.yml b/cwl/preprocess/gvcf/yml/filtercleangvcf-test.yml
new file mode 100644 (file)
index 0000000..80b6f87
--- /dev/null
@@ -0,0 +1,8 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+gvcf:
+  class: File
+  location: keep:2756bb4524567aebf16a002e94c2e407+14977/A-CUHS-CU010093-BL-COL-44045BL1.vcf.gz
+cutoff: 20
diff --git a/cwl/preprocess/gvcf/yml/filtercleangvcf-wf-1kcgi.yml b/cwl/preprocess/gvcf/yml/filtercleangvcf-wf-1kcgi.yml
new file mode 100644 (file)
index 0000000..ad7f570
--- /dev/null
@@ -0,0 +1,8 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+gvcfdir:
+  class: Directory
+  location: keep:c9636e41a02d673ecfa52af1db1dbd6e+122394
+cutoff: 20
diff --git a/cwl/preprocess/gvcf/yml/filtercleangvcf-wf-vcfbed-test.yml b/cwl/preprocess/gvcf/yml/filtercleangvcf-wf-vcfbed-test.yml
new file mode 100644 (file)
index 0000000..619ddca
--- /dev/null
@@ -0,0 +1,8 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+gvcfdir:
+  class: Directory
+  location: keep:3c770879900f335fe58be1d9e6b42420+1353
+cutoff: 20
diff --git a/cwl/preprocess/gvcf/yml/keepGQdot-test.yml b/cwl/preprocess/gvcf/yml/keepGQdot-test.yml
new file mode 100644 (file)
index 0000000..78babbc
--- /dev/null
@@ -0,0 +1,9 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+gvcfdir:
+  class: Directory
+  location: keep:47aa578f6360c9dfb10936d21571d6c7+1197
+cutoff: 20
+keepgqdot: true
diff --git a/cwl/preprocess/haploidvcf/change_gt.js b/cwl/preprocess/haploidvcf/change_gt.js
new file mode 100644 (file)
index 0000000..fdbb659
--- /dev/null
@@ -0,0 +1,14 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+function record() {
+  var inputGT = SAMPLES[0].GT;
+  if (inputGT.indexOf('/') == -1 && inputGT.indexOf('|') == -1 ) {
+    SAMPLES[0].GT = inputGT + "/" + inputGT;
+  } else if (CHROM == 'chrM' && inputGT.indexOf('/') != -1) {
+    return inputGT.split('/')[0] == inputGT.split('/')[1];
+  } else if (CHROM == 'chrM' && inputGT.indexOf('|') != -1) {
+    return inputGT.split('|')[0] == inputGT.split('|')[1];
+  }
+}
diff --git a/cwl/preprocess/haploidvcf/fixgt-wf.cwl b/cwl/preprocess/haploidvcf/fixgt-wf.cwl
new file mode 100644 (file)
index 0000000..54e576c
--- /dev/null
@@ -0,0 +1,32 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: Workflow
+label: Scatter to fix VCF by changing haploid calls and processing chrM
+requirements:
+  ScatterFeatureRequirement: {}
+inputs:
+  vcfdir:
+    type: Directory
+    label: Input VCF directory
+
+outputs:
+  fixedvcfs:
+    type: File[]
+    label: Fixed VCFs
+    outputSource: fixgt/fixedvcf
+
+steps:
+  getfiles:
+    run: getfiles.cwl
+    in:
+      dir: vcfdir
+    out: [vcfs]
+  fixgt:
+    run: fixgt.cwl
+    scatter: [vcf]
+    in:
+      vcf: getfiles/vcfs
+    out: [fixedvcf]
diff --git a/cwl/preprocess/haploidvcf/fixgt.cwl b/cwl/preprocess/haploidvcf/fixgt.cwl
new file mode 100644 (file)
index 0000000..974e764
--- /dev/null
@@ -0,0 +1,55 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: CommandLineTool
+label: Fix VCF by changing haploid calls and processing chrM
+requirements:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 8000
+  ShellCommandRequirement: {}
+hints:
+  arv:RuntimeConstraints:
+    keep_cache: 4096
+inputs:
+  vcf:
+    type: File
+    label: Input VCF file
+  filterjs:
+    type: File
+    label: Javascript code for filtering
+    default:
+      class: File
+      location: change_gt.js
+outputs:
+  fixedvcf:
+    type: File
+    label: Fixed VCF
+    outputBinding:
+      glob: "*vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: zcat
+arguments:
+  - $(inputs.vcf)
+  - shellQuote: False
+    valueFrom: "|"
+  - "grep"
+  - "-v"
+  - "Locus GQX is less than 6 for hom deletion"
+  - shellQuote: False
+    valueFrom: "|"
+  - "rtg"
+  - "vcffilter"
+  - prefix: "-i"
+    valueFrom: "-"
+  - prefix: "-o"
+    valueFrom: $(inputs.vcf.basename)
+  - prefix: "--javascript"
+    valueFrom: $(inputs.filterjs)
diff --git a/cwl/preprocess/haploidvcf/getfiles.cwl b/cwl/preprocess/haploidvcf/getfiles.cwl
new file mode 100644 (file)
index 0000000..60dd213
--- /dev/null
@@ -0,0 +1,28 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: ExpressionTool
+label: Create list of VCFs to process
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  dir:
+    type: Directory
+    label: Input directory of VCFs
+outputs:
+  vcfs:
+    type: File[]
+    label: Output VCFs
+expression: |
+  ${
+    var vcfs = [];
+    for (var i = 0; i < inputs.dir.listing.length; i++) {
+      var file = inputs.dir.listing[i];
+      if (file.nameext == ".gz") {
+        vcfs.push(file);
+      }
+    }
+    return {"vcfs": vcfs};
+  }
diff --git a/cwl/preprocess/haploidvcf/yml/fixgt-test.yml b/cwl/preprocess/haploidvcf/yml/fixgt-test.yml
new file mode 100644 (file)
index 0000000..f96fc2e
--- /dev/null
@@ -0,0 +1,7 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcf:
+  class: File
+  location: keep:35e1d520788da317b9e51ebb52047eeb+28239/filtered_PGPC_0001_S1.genome.vcf.gz
diff --git a/cwl/preprocess/haploidvcf/yml/fixgt-wf-pgpcanada.yml b/cwl/preprocess/haploidvcf/yml/fixgt-wf-pgpcanada.yml
new file mode 100644 (file)
index 0000000..8a76b31
--- /dev/null
@@ -0,0 +1,7 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfdir:
+  class: Directory
+  location: keep:35e1d520788da317b9e51ebb52047eeb+28239
diff --git a/cwl/preprocess/nonrefvcf/fixnonref-wf.cwl b/cwl/preprocess/nonrefvcf/fixnonref-wf.cwl
new file mode 100644 (file)
index 0000000..daf4d17
--- /dev/null
@@ -0,0 +1,32 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: Workflow
+label: Scatter to fix VCF by removing GT fields that point to <NON_REF> and processing chrM
+requirements:
+  ScatterFeatureRequirement: {}
+inputs:
+  vcfdir:
+    type: Directory
+    label: Input VCF directory
+
+outputs:
+  fixedvcfs:
+    type: File[]
+    label: Fixed VCFs
+    outputSource: fixnonref/fixedvcf
+
+steps:
+  getfiles:
+    run: getfiles.cwl
+    in:
+      dir: vcfdir
+    out: [vcfs]
+  fixnonref:
+    run: fixnonref.cwl
+    scatter: [vcf]
+    in:
+      vcf: getfiles/vcfs
+    out: [fixedvcf]
diff --git a/cwl/preprocess/nonrefvcf/fixnonref.cwl b/cwl/preprocess/nonrefvcf/fixnonref.cwl
new file mode 100644 (file)
index 0000000..c32381c
--- /dev/null
@@ -0,0 +1,46 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: CommandLineTool
+label: Fix VCF by removing GT fields that point to <NON_REF> and processing chrM
+requirements:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 8000
+hints:
+  arv:RuntimeConstraints:
+    keep_cache: 4096
+inputs:
+  vcf:
+    type: File
+    label: Input VCF file
+  filterjs:
+    type: File
+    label: Javascript code for filtering
+    default:
+      class: File
+      location: ../chrmvcf/change_gt_chrM.js
+outputs:
+  fixedvcf:
+    type: File
+    label: Fixed VCF
+    outputBinding:
+      glob: "*vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [rtg, vcffilter]
+arguments:
+  - prefix: "-i"
+    valueFrom: $(inputs.vcf)
+  - prefix: "-o"
+    valueFrom: $(inputs.vcf.basename)
+  - prefix: "--keep-expr"
+    valueFrom: "ALT.length == 1 || SAMPLES[0].GT.indexOf(String(ALT.length)) == -1"
+  - prefix: "--javascript"
+    valueFrom: $(inputs.filterjs)
diff --git a/cwl/preprocess/nonrefvcf/getfiles.cwl b/cwl/preprocess/nonrefvcf/getfiles.cwl
new file mode 100644 (file)
index 0000000..60dd213
--- /dev/null
@@ -0,0 +1,28 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: ExpressionTool
+label: Create list of VCFs to process
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  dir:
+    type: Directory
+    label: Input directory of VCFs
+outputs:
+  vcfs:
+    type: File[]
+    label: Output VCFs
+expression: |
+  ${
+    var vcfs = [];
+    for (var i = 0; i < inputs.dir.listing.length; i++) {
+      var file = inputs.dir.listing[i];
+      if (file.nameext == ".gz") {
+        vcfs.push(file);
+      }
+    }
+    return {"vcfs": vcfs};
+  }
diff --git a/cwl/preprocess/nonrefvcf/yml/fixnonref-test.yml b/cwl/preprocess/nonrefvcf/yml/fixnonref-test.yml
new file mode 100644 (file)
index 0000000..b2ee0dd
--- /dev/null
@@ -0,0 +1,7 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcf:
+  class: File
+  location: keep:23c2bc9a1a35af43809dd96bf5a70f36+1061196/filtered_HG00403.haplotypeCalls.er.raw.vcf.gz
diff --git a/cwl/preprocess/nonrefvcf/yml/fixnonref-wf-1kgvcf.yml b/cwl/preprocess/nonrefvcf/yml/fixnonref-wf-1kgvcf.yml
new file mode 100644 (file)
index 0000000..da9ad5c
--- /dev/null
@@ -0,0 +1,7 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfdir:
+  class: Directory
+  location: keep:23c2bc9a1a35af43809dd96bf5a70f36+1061196
diff --git a/cwl/preprocess/nonrefvcf/yml/fixnonref-wf-ukpgp.yml b/cwl/preprocess/nonrefvcf/yml/fixnonref-wf-ukpgp.yml
new file mode 100644 (file)
index 0000000..e90419b
--- /dev/null
@@ -0,0 +1,7 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfdir:
+  class: Directory
+  location: keep:e0ca3616f52f921a1e1cfc4952935a5e+588124
diff --git a/cwl/preprocess/portablevcf/bcftools-annotate.cwl b/cwl/preprocess/portablevcf/bcftools-annotate.cwl
new file mode 100644 (file)
index 0000000..4c22378
--- /dev/null
@@ -0,0 +1,38 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: Remove unused annotations
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+inputs:
+  vcfgz:
+    type: File
+    label: Input VCF
+outputs:
+  annotatedvcfgz:
+    type: File
+    label: Annotated VCF
+    outputBinding:
+      glob: "*vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [bcftools, annotate]
+arguments:
+  - prefix: "-x"
+    valueFrom: "INFO/customer_score1,INFO/customer_score2,INFO/ADP,INFO/ADP,INFO/HET,INFO/HOM,INFO/NC,INFO/WT,FORMAT/AO,FORMAT/GL,FORMAT/QA,FORMAT/SDP,FORMAT/RD,FORMAT/AD,FORMAT/FREQ,FORMAT/PVAL,FORMAT/RBQ,FORMAT/ABQ,FORMAT/RDF,FORMAT/RDR,FORMAT/ADF,FORMAT/ADR"
+  - $(inputs.vcfgz)
+  - prefix: "-O"
+    valueFrom: "z"
+  - prefix: "-o"
+    valueFrom: $(inputs.vcfgz.basename)
+  - shellQuote: False
+    valueFrom: "&&"
+  - "tabix"
+  - prefix: "-p"
+    valueFrom: "vcf"
+  - $(inputs.vcfgz.basename)
diff --git a/cwl/preprocess/portablevcf/bcftools-reheader.cwl b/cwl/preprocess/portablevcf/bcftools-reheader.cwl
new file mode 100644 (file)
index 0000000..b25478a
--- /dev/null
@@ -0,0 +1,30 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: Change the header of VCF
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+inputs:
+  header:
+    type: File
+    label: Header file
+  vcfgz:
+    type: File
+    label: Input VCF
+outputs:
+  reheaderedvcfgz:
+    type: File
+    label: Reheadered VCF
+    outputBinding:
+      glob: "*vcf.gz"
+baseCommand: [bcftools, reheader]
+arguments:
+  - prefix: "-h"
+    valueFrom: $(inputs.header)
+  - $(inputs.vcfgz)
+  - prefix: "-o"
+    valueFrom: $(inputs.vcfgz.basename)
diff --git a/cwl/preprocess/portablevcf/cat.cwl b/cwl/preprocess/portablevcf/cat.cwl
new file mode 100644 (file)
index 0000000..5db8321
--- /dev/null
@@ -0,0 +1,22 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: Concatenate files
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+inputs:
+  txts:
+    type: File[]
+    label: Text files
+outputs:
+  cattxt:
+    type: stdout
+    label: Concatenated text
+baseCommand: cat
+arguments:
+  - $(inputs.txts)
+stdout: catsummary.txt
diff --git a/cwl/preprocess/portablevcf/getfiles.cwl b/cwl/preprocess/portablevcf/getfiles.cwl
new file mode 100644 (file)
index 0000000..04b48a2
--- /dev/null
@@ -0,0 +1,28 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: ExpressionTool
+label: Create list of VCFs to process
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  dir:
+    type: Directory
+    label: Input directory of VCFs
+outputs:
+  vcfgzs:
+    type: File[]
+    label: Output VCFs
+expression: |
+  ${
+    var vcfgzs = [];
+    for (var i = 0; i < inputs.dir.listing.length; i++) {
+      var file = inputs.dir.listing[i];
+      if (file.nameext == ".gz") {
+        vcfgzs.push(file);
+      }
+    }
+    return {"vcfgzs": vcfgzs};
+  }
diff --git a/cwl/preprocess/portablevcf/header b/cwl/preprocess/portablevcf/header
new file mode 100644 (file)
index 0000000..cd010b3
--- /dev/null
@@ -0,0 +1,104 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##reference=/tmp/crunch-job-task-work/compute85.1/tmpdir/tmphHPL2y/stgdaa8fa7e-a7e3-431f-8adb-c23c092f628c/hg19.fa
+##phasing=none
+##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
+##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
+##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
+##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
+##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
+##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">
+##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">
+##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">
+##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">
+##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">
+##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">
+##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">
+##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">
+##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">
+##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">
+##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">
+##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">
+##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele">
+##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele">
+##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio.  Ratio between depth in samples with each called alternate allele and those without.">
+##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best.">
+##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout.">
+##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex.">
+##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing.  Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR.">
+##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position.">
+##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles.">
+##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length">
+##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles">
+##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles">
+##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments">
+##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments">
+##INFO=<ID=ADP,Number=1,Type=Integer,Description="Average per-sample depth of bases with Phred score >= 15">
+##INFO=<ID=WT,Number=1,Type=Integer,Description="Number of samples called reference (wild-type)">
+##INFO=<ID=HET,Number=1,Type=Integer,Description="Number of samples called heterozygous-variant">
+##INFO=<ID=HOM,Number=1,Type=Integer,Description="Number of samples called homozygous-variant">
+##INFO=<ID=NC,Number=1,Type=Integer,Description="Number of samples not called">
+##INFO=<ID=SF,Number=.,Type=String,Description="Source File (index to sourceFiles, f when filtered)">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=customer_score1,Number=A,Type=String,Description="Customer score 1">
+##INFO=<ID=customer_score2,Number=A,Type=String,Description="Customer score 2">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
+##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count">
+##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations">
+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count">
+##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=SDP,Number=1,Type=Integer,Description="Raw Read Depth as reported by SAMtools">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Quality Read Depth of bases with Phred score >= 15">
+##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
+##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
+##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
+##FORMAT=<ID=PVAL,Number=1,Type=String,Description="P-value from Fisher's Exact Test">
+##FORMAT=<ID=RBQ,Number=1,Type=Integer,Description="Average quality of reference-supporting bases (qual1)">
+##FORMAT=<ID=ABQ,Number=1,Type=Integer,Description="Average quality of variant-supporting bases (qual2)">
+##FORMAT=<ID=RDF,Number=1,Type=Integer,Description="Depth of reference-supporting bases on forward strand (reads1plus)">
+##FORMAT=<ID=RDR,Number=1,Type=Integer,Description="Depth of reference-supporting bases on reverse strand (reads1minus)">
+##FORMAT=<ID=ADF,Number=1,Type=Integer,Description="Depth of variant-supporting bases on forward strand (reads2plus)">
+##FORMAT=<ID=ADR,Number=1,Type=Integer,Description="Depth of variant-supporting bases on reverse strand (reads2minus)">
+##contig=<ID=chr1>
+##contig=<ID=chr2>
+##contig=<ID=chr3>
+##contig=<ID=chr4>
+##contig=<ID=chr5>
+##contig=<ID=chr6>
+##contig=<ID=chr7>
+##contig=<ID=chr8>
+##contig=<ID=chr9>
+##contig=<ID=chr10>
+##contig=<ID=chr11>
+##contig=<ID=chr12>
+##contig=<ID=chr13>
+##contig=<ID=chr14>
+##contig=<ID=chr15>
+##contig=<ID=chr16>
+##contig=<ID=chr17>
+##contig=<ID=chr18>
+##contig=<ID=chr19>
+##contig=<ID=chr20>
+##contig=<ID=chr21>
+##contig=<ID=chr22>
+##contig=<ID=chrM>
+##contig=<ID=chrX>
+##contig=<ID=chrY>
+#CHROM POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  freebayes
diff --git a/cwl/preprocess/portablevcf/preprocess-portablevcf-wf.cwl b/cwl/preprocess/portablevcf/preprocess-portablevcf-wf.cwl
new file mode 100644 (file)
index 0000000..c808799
--- /dev/null
@@ -0,0 +1,69 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.0
+class: Workflow
+label: Preprocess portable VCF
+requirements:
+  arv:RunInSingleContainer: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    ramMin: 12000
+
+inputs:
+  vcfgz:
+    type: File
+    label: Input VCF
+  header:
+    type: File
+    label: Header file
+  sdf:
+    type: Directory
+    label: RTG reference directory
+  cleanvcf:
+    type: File
+    label: Code that cleans VCFs
+
+outputs:
+  processedvcfgz:
+    type: File
+    label: Processed VCF
+    outputSource: bcftools-annotate/annotatedvcfgz
+  summary:
+    type: File
+    label: Summary file
+    outputSource: rtg-vcfeval/summary
+
+steps:
+  bcftools-reheader:
+    run: bcftools-reheader.cwl
+    in:
+      header: header
+      vcfgz: vcfgz
+    out: [reheaderedvcfgz]
+
+  sort-clean:
+    run: sort-clean.cwl
+    in:
+      vcfgz: bcftools-reheader/reheaderedvcfgz
+      cleanvcf: cleanvcf
+    out: [cleanvcfgz]
+
+  bcftools-annotate:
+    run: bcftools-annotate.cwl
+    in:
+      vcfgz: sort-clean/cleanvcfgz
+    out: [annotatedvcfgz]
+
+  rtg-vcfeval:
+    run: rtg-vcfeval.cwl
+    in:
+      baselinevcfgz: bcftools-annotate/annotatedvcfgz
+      callsvcfgz: bcftools-annotate/annotatedvcfgz
+      sdf: sdf
+    out: [summary]
diff --git a/cwl/preprocess/portablevcf/rtg-vcfeval.cwl b/cwl/preprocess/portablevcf/rtg-vcfeval.cwl
new file mode 100644 (file)
index 0000000..3244ae1
--- /dev/null
@@ -0,0 +1,38 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: RTG vcfeval to compare VCFs
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+inputs:
+  baselinevcfgz:
+    type: File
+    label: Baseline VCF
+    secondaryFiles: [.tbi]
+  callsvcfgz:
+    type: File
+    label: Calls VCF
+    secondaryFiles: [.tbi]
+  sdf:
+    type: Directory
+    label: RTG reference directory
+outputs:
+  summary:
+    type: File
+    label: Summary file
+    outputBinding:
+      glob: "eval/summary.txt"
+baseCommand: [rtg, vcfeval]
+arguments:
+  - prefix: "-b"
+    valueFrom: $(inputs.baselinevcfgz)
+  - prefix: "-c"
+    valueFrom: $(inputs.callsvcfgz)
+  - prefix: "-t"
+    valueFrom: $(inputs.sdf)
+  - prefix: "-o"
+    valueFrom: "eval"
diff --git a/cwl/preprocess/portablevcf/scatter-preprocess-portablevcf-wf.cwl b/cwl/preprocess/portablevcf/scatter-preprocess-portablevcf-wf.cwl
new file mode 100644 (file)
index 0000000..845cc9b
--- /dev/null
@@ -0,0 +1,60 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: Workflow
+label: Scatter to process portable VCFs
+requirements:
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+
+inputs:
+  vcfsdir:
+    type: Directory
+    label: Input directory of VCFs
+  header:
+    type: File
+    label: Header file
+    default:
+      class: File
+      location: header
+  sdf:
+    type: Directory
+    label: RTG reference directory
+  cleanvcf:
+    type: File
+    label: Code that cleans VCFs
+    default:
+      class: File
+      location: ../gvcf/src/cleanvcf.py
+
+outputs:
+  processedvcfgzs:
+    type: File[]
+    label: Processed VCFs
+    outputSource: preprocess-portablevcf-wf/processedvcfgz
+
+steps:
+  getfiles:
+    run: getfiles.cwl
+    in:
+      dir: vcfsdir
+    out: [vcfgzs]
+  preprocess-portablevcf-wf:
+    run: preprocess-portablevcf-wf.cwl
+    scatter: vcfgz
+    in:
+      vcfgz: getfiles/vcfgzs
+      header: header
+      sdf: sdf
+      cleanvcf: cleanvcf
+    out: [processedvcfgz, summary]
+  cat:
+    run: cat.cwl
+    in:
+      txts: preprocess-portablevcf-wf/summary
+    out: [cattxt]
diff --git a/cwl/preprocess/portablevcf/sort-clean.cwl b/cwl/preprocess/portablevcf/sort-clean.cwl
new file mode 100644 (file)
index 0000000..3d8fbea
--- /dev/null
@@ -0,0 +1,46 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: Sort VCF and clean duplicate calls
+requirements:
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: vcfutil
+inputs:
+  cleanvcf:
+    type: File
+    label: Code that cleans VCFs
+  vcfgz:
+    type: File
+    label: Input VCF
+outputs:
+  cleanvcfgz:
+    type: File
+    label: Clean VCF
+    outputBinding:
+      glob: "*vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: vcf-sort
+arguments:
+  - "-c"
+  - $(inputs.vcfgz)
+  - shellQuote: False
+    valueFrom: "|"
+  - $(inputs.cleanvcf)
+  - shellQuote: False
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: False
+    valueFrom: ">"
+  - $(inputs.vcfgz.basename)
+  - shellQuote: False
+    valueFrom: "&&"
+  - "tabix"
+  - prefix: "-p"
+    valueFrom: "vcf"
+  - $(inputs.vcfgz.basename)
diff --git a/cwl/preprocess/portablevcf/yml/bcftools-annotate-test.yml b/cwl/preprocess/portablevcf/yml/bcftools-annotate-test.yml
new file mode 100644 (file)
index 0000000..1a15c7d
--- /dev/null
@@ -0,0 +1,7 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfgz:
+  class: File
+  location: keep:b0571bd9751df8769ec145289f939685+479/hu007B82_1YB27IM-portable.vcf.gz
diff --git a/cwl/preprocess/portablevcf/yml/preprocess-portablevcf-wf.yml b/cwl/preprocess/portablevcf/yml/preprocess-portablevcf-wf.yml
new file mode 100644 (file)
index 0000000..e736c83
--- /dev/null
@@ -0,0 +1,16 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfgz:
+  class: File
+  location: keep:9f88a818cbc5b7b6614749c5508226d3+37493/hu007B82_1YB27IM-portable.vcf.gz
+sdf:
+  class: Directory
+  location: keep:6cf48f302f6615f44002c6df07852778+1240
+header:
+  class: File
+  location: ../header
+cleanvcf:
+  class: File
+  location: ../../gvcf/src/cleanvcf.py
diff --git a/cwl/preprocess/portablevcf/yml/scatter-preprocess-portablevcf-wf.yml b/cwl/preprocess/portablevcf/yml/scatter-preprocess-portablevcf-wf.yml
new file mode 100644 (file)
index 0000000..0b57cd4
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfsdir:
+  class: Directory
+  location: keep:9f88a818cbc5b7b6614749c5508226d3+37493
+sdf:
+  class: Directory
+  location: keep:6cf48f302f6615f44002c6df07852778+1240
diff --git a/cwl/preprocess/portablevcf/yml/sort-clean.cwl b/cwl/preprocess/portablevcf/yml/sort-clean.cwl
new file mode 100644 (file)
index 0000000..b68d924
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfgz:
+  class: File
+  location: keep:9f88a818cbc5b7b6614749c5508226d3+37493/hu007B82_1YB27IM-portable.vcf.gz
+cleanvcf:
+  class: File
+  location: ../../gvcf/src/cleanvcf.py
diff --git a/cwl/preprocess/simons/filter-vcf.cwl b/cwl/preprocess/simons/filter-vcf.cwl
new file mode 100644 (file)
index 0000000..d155ca5
--- /dev/null
@@ -0,0 +1,53 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.0
+class: CommandLineTool
+label: Filters VCF by a specified QUAL and GQ cutoff
+requirements:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 8000
+  ShellCommandRequirement: {}
+hints:
+  arv:RuntimeConstraints:
+    keep_cache: 4096
+inputs:
+  vcf:
+    type: File
+    label: Input VCF file
+  sample:
+    type: string
+    label: Sample name of VCF
+  qualcutoff:
+    type: int
+    label: Filtering QUAL cutoff
+  gqcutoff:
+    type: int
+    label: Filtering GQ cutoff
+outputs:
+  filteredvcf:
+    type: File
+    label: Filtered VCF
+    outputBinding:
+      glob: "*vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [bcftools, view]
+arguments:
+  - "-Oz"
+  - prefix: "-o"
+    valueFrom: $(inputs.sample).vcf.gz
+  - prefix: "-e"
+    valueFrom: "QUAL<$(inputs.qualcutoff) | QUAL='.' | FORMAT/GQ<$(inputs.gqcutoff)"
+  - $(inputs.vcf)
+  - shellQuote: false
+    valueFrom: "&&"
+  - "tabix"
+  - prefix: "-p"
+    valueFrom: "vcf"
+  - $(inputs.sample).vcf.gz
diff --git a/cwl/preprocess/simons/getfiles.cwl b/cwl/preprocess/simons/getfiles.cwl
new file mode 100644 (file)
index 0000000..7f1ba16
--- /dev/null
@@ -0,0 +1,34 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: ExpressionTool
+label: Create list of VCFs and sample names
+inputs:
+  dir:
+    type: Directory
+    label: Input directory of VCFs
+outputs:
+  vcfs:
+    type: File[]
+    label: Output VCFs
+  samples:
+    type: string[]
+    label: Sample names of VCFs
+requirements:
+  InlineJavascriptRequirement: {}
+expression: |
+  ${
+    var vcfs = [];
+    var samples = [];
+    for (var i = 0; i < inputs.dir.listing.length; i++) {
+      var file = inputs.dir.listing[i];
+      if (file.nameext == ".gz") {
+        vcfs.push(file);
+        var sample = file.basename.split(".")[0];
+        samples.push(sample);
+      }
+    }
+    return {"vcfs": vcfs, "samples": samples};
+  }
diff --git a/cwl/preprocess/simons/make-bed.cwl b/cwl/preprocess/simons/make-bed.cwl
new file mode 100644 (file)
index 0000000..6d688e5
--- /dev/null
@@ -0,0 +1,58 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+cwlVersion: v1.0
+class: CommandLineTool
+label: Make BED from VCF for regions passing a specified QUAL and GQ cutoff
+requirements:
+  DockerRequirement:
+    dockerPull: vcfutil
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 22000
+  ShellCommandRequirement: {}
+hints:
+  arv:RuntimeConstraints:
+    keep_cache: 4096
+inputs:
+  vcf:
+    type: File
+    label: Input VCF file
+  sample:
+    type: string
+    label: Sample name of VCF
+  qualcutoff:
+    type: int
+    label: Filtering QUAL cutoff
+  gqcutoff:
+    type: int
+    label: Filtering GQ cutoff
+outputs:
+  bed:
+    type: stdout
+    label: BED for regions that pass cutoff
+baseCommand: [bcftools, view]
+arguments:
+  - prefix: "-e"
+    valueFrom: "QUAL<$(inputs.qualcutoff) | QUAL='.' | FORMAT/GQ<$(inputs.gqcutoff)"
+  - $(inputs.vcf)
+  - shellQuote: false
+    valueFrom: "|"
+  - "convert2bed"
+  - prefix: "-i"
+    valueFrom: "vcf"
+  - "-d"
+  - shellQuote: false
+    valueFrom: "|"
+  - "cut"
+  - "-f1-3"
+  - shellQuote: false
+    valueFrom: "|"
+  - "bedtools"
+  - "merge"
+  - prefix: "-i"
+    valueFrom: "-"
+stdout: $(inputs.sample).bed
diff --git a/cwl/preprocess/simons/make-vcf-bed-wf.cwl b/cwl/preprocess/simons/make-vcf-bed-wf.cwl
new file mode 100644 (file)
index 0000000..d14f17f
--- /dev/null
@@ -0,0 +1,64 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+cwlVersion: v1.0
+class: Workflow
+label: Scatter to filter VCF and make BED region
+requirements:
+  ScatterFeatureRequirement: {}
+inputs:
+  variantsvcfdir:
+    type: Directory
+    label: Input variants only VCF directory
+  fullvcfdir:
+    type: Directory
+    label: Input full VCF directory
+  qualcutoff:
+    type: int
+    label: Filtering QUAL cutoff
+  gqcutoff:
+    type: int
+    label: Filtering GQ cutoff
+
+outputs:
+  filteredvcfs:
+    type: File[]
+    label: Output VCFs
+    outputSource: filter-vcf/filteredvcf
+  beds:
+    type: File[]
+    label: Output BEDs
+    outputSource: make-bed/bed
+
+steps:
+  getvariantsvcfs:
+    run: getfiles.cwl
+    in:
+      dir: variantsvcfdir
+    out: [vcfs, samples]
+  getfullvcfs:
+    run: getfiles.cwl
+    in:
+      dir: fullvcfdir
+    out: [vcfs, samples]
+  filter-vcf:
+    run: filter-vcf.cwl
+    scatter: [vcf, sample]
+    scatterMethod: dotproduct
+    in:
+      vcf: getvariantsvcfs/vcfs
+      sample: getvariantsvcfs/samples
+      qualcutoff: qualcutoff
+      gqcutoff: gqcutoff
+    out: [filteredvcf]
+  make-bed:
+    run: make-bed.cwl
+    scatter: [vcf, sample]
+    scatterMethod: dotproduct
+    in:
+      vcf: getfullvcfs/vcfs
+      sample: getfullvcfs/samples
+      qualcutoff: qualcutoff
+      gqcutoff: gqcutoff
+    out: [bed]
diff --git a/cwl/preprocess/simons/yml/filter-vcf-test.yml b/cwl/preprocess/simons/yml/filter-vcf-test.yml
new file mode 100644 (file)
index 0000000..c944e85
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcf:
+  class: File
+  location: keep:c43692b61030dc10d24d84ffc97ff888+157254/LP6005441-DNA_A01.annotated.nh2.variants.vcf.gz
+sample: "LP6005441-DNA_A01"
+qualcutoff: 20
+gqcutoff: 20
diff --git a/cwl/preprocess/simons/yml/make-bed-test.yml b/cwl/preprocess/simons/yml/make-bed-test.yml
new file mode 100644 (file)
index 0000000..1e21beb
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcf:
+  class: File
+  location: keep:8452af084a85eb3b49ab5d77137b7e0a+4903477/LP6005441-DNA_A01.annotated.nh.vcf.gz
+sample: "LP6005441-DNA_A01"
+qualcutoff: 20
+gqcutoff: 20
diff --git a/cwl/preprocess/simons/yml/make-vcf-bed-wf-simons.yml b/cwl/preprocess/simons/yml/make-vcf-bed-wf-simons.yml
new file mode 100644 (file)
index 0000000..161bf8f
--- /dev/null
@@ -0,0 +1,12 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+variantsvcfdir:
+  class: Directory
+  location: keep:c43692b61030dc10d24d84ffc97ff888+157254
+fullvcfdir:
+  class: Directory
+  location: keep:8452af084a85eb3b49ab5d77137b7e0a+4903477
+qualcutoff: 20
+gqcutoff: 20
diff --git a/cwl/preprocess/splitvcf/concatvcf-wf.cwl b/cwl/preprocess/splitvcf/concatvcf-wf.cwl
new file mode 100644 (file)
index 0000000..3bffa7f
--- /dev/null
@@ -0,0 +1,33 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: Workflow
+label: Concatenate a set of VCFs split by chromosomes
+requirements:
+  ScatterFeatureRequirement: {}
+hints:
+  cwltool:LoadListingRequirement:
+    loadListing: shallow_listing
+inputs:
+  vcfdirs:
+    type: Directory[]
+    label: Input VCFs directories
+
+outputs:
+  vcfs:
+    type: File[]
+    label: Concatenated VCFs
+    outputSource: concatvcf/vcf
+    secondaryFiles: [.tbi]
+
+steps:
+  concatvcf:
+    run: concatvcf.cwl
+    scatter: vcfdir
+    in:
+      vcfdir: vcfdirs
+    out: [vcf]
diff --git a/cwl/preprocess/splitvcf/concatvcf.cwl b/cwl/preprocess/splitvcf/concatvcf.cwl
new file mode 100644 (file)
index 0000000..efddfef
--- /dev/null
@@ -0,0 +1,40 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: CommandLineTool
+label: Concatenate VCFs split by chromosomes
+requirements:
+  DockerRequirement:
+    dockerPull: arvados/l7g
+  ResourceRequirement:
+    coresMin: 2
+    ramMin: 8000
+hints:
+  arv:RuntimeConstraints:
+    keep_cache: 4096
+inputs:
+  bashscript:
+    type: File
+    label: Master script to concatenate VCFs
+    default:
+      class: File
+      location: src/concatvcf.sh
+  vcfdir:
+    type: Directory
+    label: Input VCFs directory
+outputs:
+  vcf:
+    type: File
+    label: Concatenated VCF
+    outputBinding:
+      glob: "*vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: bash
+arguments:
+  - $(inputs.bashscript)
+  - $(inputs.vcfdir)
diff --git a/cwl/preprocess/splitvcf/src/concatvcf.sh b/cwl/preprocess/splitvcf/src/concatvcf.sh
new file mode 100755 (executable)
index 0000000..57e8150
--- /dev/null
@@ -0,0 +1,20 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/bin/bash
+
+set -e
+set -o pipefail
+
+vcfdir="$1"
+
+vcfchr1=`ls $vcfdir/*.chr1.*`
+sample=`basename $vcfchr1 | cut -d '.' -f 1`
+chroms=(chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY chrM)
+
+files=$(for chrom in ${chroms[@]}; do echo "$vcfdir/$sample.raw_variants.$chrom.g.vcf.gz"; done)
+echo "files: ${files[@]}"
+
+bcftools concat ${files[@]} -n -O z -o $sample.vcf.gz
+tabix $sample.vcf.gz
diff --git a/cwl/preprocess/splitvcf/yml/concatvcf-test.yml b/cwl/preprocess/splitvcf/yml/concatvcf-test.yml
new file mode 100644 (file)
index 0000000..578179c
--- /dev/null
@@ -0,0 +1,7 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfdir:
+  class: Directory
+  location: keep:d24d44b265ae3e38799c4f7301cbf6c3+5241
diff --git a/cwl/preprocess/splitvcf/yml/concatvcf-wf-test.yml b/cwl/preprocess/splitvcf/yml/concatvcf-wf-test.yml
new file mode 100644 (file)
index 0000000..35eed0f
--- /dev/null
@@ -0,0 +1,461 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfdirs:
+  - class: Directory
+    location: keep:d24d44b265ae3e38799c4f7301cbf6c3+5241
+  - class: Directory
+    location: keep:4618438a7ceaca560a5cfa1823030667+4485
+  - class: Directory
+    location: keep:ac1e06ab75d3349583f056352b71889b+5539
+  - class: Directory
+    location: keep:ad6fec9bdd7350310803d237219b749d+4136
+  - class: Directory
+    location: keep:0055b1f52ad49c4a22e50c64eaadbc1a+4987
+  - class: Directory
+    location: keep:193fa6830c3a1fc4d96921823241cb2a+5660
+  - class: Directory
+    location: keep:733fae4f0d7d054d02af264c751d18a7+4400
+  - class: Directory
+    location: keep:f4957d5445b1dd29a642dd83adb674b0+4180
+  - class: Directory
+    location: keep:80f588645ab70600e4f5a6ac388d99c7+3493
+  - class: Directory
+    location: keep:6a3f10cc1cf6fe4e75ffa33c89e69a13+5536
+  - class: Directory
+    location: keep:bda45fe62339ca89e52d1291e6567323+3497
+  - class: Directory
+    location: keep:ba0b6ba37ecfec399ac95585ad4ec6ce+4696
+  - class: Directory
+    location: keep:38782fb12161a93e8726d51689e472f8+4779
+  - class: Directory
+    location: keep:65d5701bd4bd5507c5ab8aaaf85b5ba2+4400
+  - class: Directory
+    location: keep:13fcaad3d7d3adb74ebcc0a0e6f027af+4096
+  - class: Directory
+    location: keep:2365ad35f1000ce1fbd4a50fb5fcffb3+6462
+  - class: Directory
+    location: keep:db4c9a379ce7ac19d2d1f1c822e0eeba+3970
+  - class: Directory
+    location: keep:166c4f7b57bfd7cd99ff098c2a3ac52e+5032
+  - class: Directory
+    location: keep:187f854ea63b95995fd3f548fa23a997+4482
+  - class: Directory
+    location: keep:75acc717e6decb8255da3d96cbace070+5328
+  - class: Directory
+    location: keep:fda99bbdf0ac9943811feef5431a57ff+5114
+  - class: Directory
+    location: keep:567fab24ecb57ea41ca8a506479d27b8+6126
+  - class: Directory
+    location: keep:ad3781bb7980f24e9c22382b88f7c813+3586
+  - class: Directory
+    location: keep:8ca5bf1bb1af0af31e922a80158cdf3f+4357
+  - class: Directory
+    location: keep:ea30c0ae929a417580151a1500d75e5f+5451
+  - class: Directory
+    location: keep:b7dae07470bd05aa59bac28f3207640e+5112
+  - class: Directory
+    location: keep:d899dfb98321bf816898f96fc10e78b2+5538
+  - class: Directory
+    location: keep:50fb97d70e0a6f752dd68fd43052c9db+5241
+  - class: Directory
+    location: keep:6141abb4574a7702e7784da17def366c+3674
+  - class: Directory
+    location: keep:632b73968d6d42b238284a3fbd500801+4401
+  - class: Directory
+    location: keep:b93604163adcc1fd6e468f001cc12106+3544
+  - class: Directory
+    location: keep:1f3d41e84cc7e0d8bc0888aae7acc5bf+5453
+  - class: Directory
+    location: keep:6b26815278d0121a3b1c197e4481d320+4444
+  - class: Directory
+    location: keep:695861a88de1590e44d6c672a3df1be2+4013
+  - class: Directory
+    location: keep:8d69c651e900f20bc8ba899d4b245b9b+4866
+  - class: Directory
+    location: keep:ea298b58ea1ed4e94165fe030639ff8c+4184
+  - class: Directory
+    location: keep:a03b834c8898256fd462eda2feb2e83f+6171
+  - class: Directory
+    location: keep:294f4c6c20f3b7f305f2f2585455d596+4014
+  - class: Directory
+    location: keep:f8606ca857ff0a450a0d18a623b1f3b8+5746
+  - class: Directory
+    location: keep:14d0b9b59ea6b2e5828b96101eb7a189+4229
+  - class: Directory
+    location: keep:0f41c29dae36dccf866e0aa0df624223+4733
+  - class: Directory
+    location: keep:ca65d1982e7698639bf8e851de57c95f+6039
+  - class: Directory
+    location: keep:f6143cc3b4a208a8b050358c5075feaa+4267
+  - class: Directory
+    location: keep:86e8014321d6e64f868eb956ee33279d+6040
+  - class: Directory
+    location: keep:5bdc4afcb1b5a48d4a213ef9d14f5b66+5537
+  - class: Directory
+    location: keep:8bbe0f33b1f96cc448b73ea29b13fa05+3970
+  - class: Directory
+    location: keep:ad2619b369001e92c6a8929112a3e802+6463
+  - class: Directory
+    location: keep:594969732a5a787581d84b12415ea6a1+3967
+  - class: Directory
+    location: keep:048a2e05e2128f05b50d8c7e8667cccd+5497
+  - class: Directory
+    location: keep:76e77ceaa3dde8a31c0fd70c4b212ed3+6127
+  - class: Directory
+    location: keep:628cf8d9caf1402cef7d6a746d57e3a0+5829
+  - class: Directory
+    location: keep:d9c637434673f267135aba455075153d+4357
+  - class: Directory
+    location: keep:221c4c7f0fd0d37e72eaf45c164cd783+4265
+  - class: Directory
+    location: keep:912f3aa2f9ee24b189af41732452de48+5498
+  - class: Directory
+    location: keep:0af97b78c139b51d72bfbd6dadbf81c9+4566
+  - class: Directory
+    location: keep:4f740097a65fc1dca17fba44af9da3b0+3844
+  - class: Directory
+    location: keep:354f3af52a35e75412315c3b19d956d8+7220
+  - class: Directory
+    location: keep:36d66871b43209c87e69ea85a8b4c5c9+6084
+  - class: Directory
+    location: keep:830d48b689c3b8582573ca3b9b569500+5412
+  - class: Directory
+    location: keep:60850336a290364e609caa96f72fc70a+5999
+  - class: Directory
+    location: keep:5bca8b3256ed22250242caa1a01611c8+5956
+  - class: Directory
+    location: keep:84de811ea49ad0153db5606a2566ce3b+5071
+  - class: Directory
+    location: keep:f7a3295100c5c5735a23795fbdaf6034+4862
+  - class: Directory
+    location: keep:54b87549459758a414deb9eac38aa841+4522
+  - class: Directory
+    location: keep:641a9ca375fd46970aae5ca06be39041+5114
+  - class: Directory
+    location: keep:16fb96918f11cb9af55b94ba30f741b3+3588
+  - class: Directory
+    location: keep:011aecfb2bf24d5cc628abad09328fd4+3280
+  - class: Directory
+    location: keep:9b0af97106d50e81d577d7782ca6350e+5911
+  - class: Directory
+    location: keep:2537e1fb26425a3bce7c04a33fd618f6+5497
+  - class: Directory
+    location: keep:9cd3e4e95784b22244e6fd4252112e18+4358
+  - class: Directory
+    location: keep:05da96299af677ce5b719bb58dbc6ad6+3592
+  - class: Directory
+    location: keep:c258e1121c65c6a98efdf891f262de55+4736
+  - class: Directory
+    location: keep:1e4a75104b98922d9c9b8f2bc864f148+5866
+  - class: Directory
+    location: keep:e009fcb61ebe9a2f602db344730041d6+5454
+  - class: Directory
+    location: keep:2961d6b5887f3cff5cc9452f200f3b18+5281
+  - class: Directory
+    location: keep:5b45e3c046c74c45a8f868ae5d14bc56+3545
+  - class: Directory
+    location: keep:cbd66172ac3a4756c2bcf76c1d565840+5455
+  - class: Directory
+    location: keep:fb176f2e05f99d0d814e1326fdcc9485+4141
+  - class: Directory
+    location: keep:cf33d92e949057ebea253299f11abeda+5622
+  - class: Directory
+    location: keep:289329596a74adbc9d390a25b404db8f+4315
+  - class: Directory
+    location: keep:c41fd5161e38b30e639502f8df246599+4569
+  - class: Directory
+    location: keep:d23b4ad254c7e0fae82ad78776567089+5327
+  - class: Directory
+    location: keep:d585d21b452860412547931b09338299+5914
+  - class: Directory
+    location: keep:a2d9dd61e935f127afa7605519356c69+5202
+  - class: Directory
+    location: keep:3a50023554d102be1af7a68307777de4+3799
+  - class: Directory
+    location: keep:f994d727ced1c46f50019a3474c0a4f2+3672
+  - class: Directory
+    location: keep:4462d85bb04cdd49d17da80f6bfd776a+5620
+  - class: Directory
+    location: keep:bc5d9debc9c3a8e254d737666075ba6e+4567
+  - class: Directory
+    location: keep:5ec91c7a86862bd77683d92b8c740f45+4611
+  - class: Directory
+    location: keep:3ff29c24ffc69353a7400323f901192f+3459
+  - class: Directory
+    location: keep:db20379681c4f771a2abd95f0e2f787e+5201
+  - class: Directory
+    location: keep:9921901515f0d9c8919535a0397d711d+5622
+  - class: Directory
+    location: keep:47ef8e39657d08ed0bcc9f713d8a2d99+4397
+  - class: Directory
+    location: keep:b262cb419fc6849ab14b73dc16563665+6208
+  - class: Directory
+    location: keep:841fa9252acd6e045e521a6b9997cb06+5409
+  - class: Directory
+    location: keep:7a2417c467f2113fbdbcee7d250390c0+4775
+  - class: Directory
+    location: keep:579960f6698c5044f589b2ea82c07825+5069
+  - class: Directory
+    location: keep:791493d4b469db1fe0157bee6cc909a0+6883
+  - class: Directory
+    location: keep:4622de7734552622979a93af0e5e7c23+5539
+  - class: Directory
+    location: keep:58bb23119f496fa0214e201bce14f106+3842
+  - class: Directory
+    location: keep:fa601c3e3fe58d25b2bfef9bf11dd9fe+5411
+  - class: Directory
+    location: keep:54d53aee94ccfb5503f18760d3183ee0+3502
+  - class: Directory
+    location: keep:40ba7b10f601ad7eaf6c2f4896b7b062+4400
+  - class: Directory
+    location: keep:039eeb9ba43ba05810b63dc5ddbd7d1d+4275
+  - class: Directory
+    location: keep:b6e40ed4a05c3e1ab46dcbc147a902ec+4522
+  - class: Directory
+    location: keep:88f3098d7e4cbaad6c82be519a3c55ea+4860
+  - class: Directory
+    location: keep:31894b9be9d683995b4dcc0e9bfbaa4e+5242
+  - class: Directory
+    location: keep:f3b3d525f4558ec8d9ca5653a8effff1+5158
+  - class: Directory
+    location: keep:48aa2624dabea1afa424e6ef56e415c3+4100
+  - class: Directory
+    location: keep:26b9f40dca9076c986a9e640a6741ff6+5454
+  - class: Directory
+    location: keep:07fa4aa5acc2b9406395219489021eb7+3926
+  - class: Directory
+    location: keep:0e7f2aeee513cd9c60e2830fcd0521e7+4988
+  - class: Directory
+    location: keep:000272da0e36f3ad965f91d3c785f223+4526
+  - class: Directory
+    location: keep:a8603769d5908bf590b3e5e8e0d283fc+5370
+  - class: Directory
+    location: keep:ee31c07013596533cf41497c49b98bea+5387
+  - class: Directory
+    location: keep:db3182ec5412536e88b28913d0a22ada+5893
+  - class: Directory
+    location: keep:c4aa350f6c1c24d39ec1e3f2ee03e319+10402
+  - class: Directory
+    location: keep:3c49124b2fe87575c6fbbb22bd84bc8a+5770
+  - class: Directory
+    location: keep:cedbeb74b2939d66673ea4e9034701c8+13980
+  - class: Directory
+    location: keep:80930b1bbc50cfb710530be3c5a38084+9219
+  - class: Directory
+    location: keep:801ef17053aa91406e853c3e5afec733+6864
+  - class: Directory
+    location: keep:5a6f5b98c27b6449ec61f16ca20c6219+8838
+  - class: Directory
+    location: keep:bf84aed2ad39d7a6a8709e4a786048d3+8879
+  - class: Directory
+    location: keep:fb3f9ba3ae13454c4e3c50735bae1cf5+8624
+  - class: Directory
+    location: keep:80316f42c52b5e1b69a3fb5e0c1f1ddc+5897
+  - class: Directory
+    location: keep:fe389f368a539c800c26c0bc5fb742d7+4286
+  - class: Directory
+    location: keep:0e308f382a914fa7c9206d9e4cc77921+3381
+  - class: Directory
+    location: keep:3ddf70e354c0dd984375dabbf28fc59a+6354
+  - class: Directory
+    location: keep:b5d10ce0c73cabfbac498fb78eecfc06+5133
+  - class: Directory
+    location: keep:e8fa3dfbda584f7bf628c98210e834e7+6354
+  - class: Directory
+    location: keep:0e7c7081e6134d0d6506e05aa2ec2156+6017
+  - class: Directory
+    location: keep:40465d3bf5da56a3251d43157f72280e+6059
+  - class: Directory
+    location: keep:14439d0abe42d802818b2217f0d6b2ab+6484
+  - class: Directory
+    location: keep:badebc224945d0e9184dd0cb4cfae8e4+12086
+  - class: Directory
+    location: keep:118dc90f74a9b2f5123b691469b188c3+10610
+  - class: Directory
+    location: keep:460714ee63d216b0d3e31e7df4d56ef1+9133
+  - class: Directory
+    location: keep:950f777c77cc3297193a1589f7369907+5645
+  - class: Directory
+    location: keep:5e92c5473fb4082c766bc8e6a6d565ff+5723
+  - class: Directory
+    location: keep:1a878e042a6589dcd2cbf7f5871ad59e+9518
+  - class: Directory
+    location: keep:300d5cba22f4a4ea23070e754f3dcbe5+8414
+  - class: Directory
+    location: keep:99c05547b54571271c0247c875273d5a+8127
+  - class: Directory
+    location: keep:f99173d4ea915f8a555c8573af29db28+9095
+  - class: Directory
+    location: keep:fa049a5fca2922b63f99d18f31f8e4e8+6330
+  - class: Directory
+    location: keep:3fbe7fd8fa060ac932967413696b9c81+9447
+  - class: Directory
+    location: keep:a26bd7a31ff75026d40a2da60ae419bd+6458
+  - class: Directory
+    location: keep:000eab0395e66bc3713918779c23279f+8692
+  - class: Directory
+    location: keep:c8f8a9e80405cbd66db3bae9e54f1a13+7720
+  - class: Directory
+    location: keep:3da0c715c14e6aa7ca42c07b889282ea+5533
+  - class: Directory
+    location: keep:20e265c6d928d52753cc63c9d7f78cb4+5558
+  - class: Directory
+    location: keep:7825900466a2fd915996e85563d143c4+5236
+  - class: Directory
+    location: keep:031b8fb70b62a432fcb64ff131615f7e+6270
+  - class: Directory
+    location: keep:6fba515068ce39cfd380ab84a8944527+6020
+  - class: Directory
+    location: keep:6d293abe59b879411bb9078d10fb56ae+6064
+  - class: Directory
+    location: keep:ee1a1b528312c32a279f69a8e650fb66+5979
+  - class: Directory
+    location: keep:b8f88a4c961f45ad8abbf69a94df2c18+6229
+  - class: Directory
+    location: keep:c22ea1ed97e753c1c7c9c7f5cdc67b0a+6821
+  - class: Directory
+    location: keep:5fe068b8da7169c44f268136e3149cb6+5603
+  - class: Directory
+    location: keep:8e5e40c387c49513a08e94f295a32a62+5753
+  - class: Directory
+    location: keep:3ee5a66df9c62c70f02562fcee7318a2+5964
+  - class: Directory
+    location: keep:3f575d430e959a7c17511baf70fb2eeb+7598
+  - class: Directory
+    location: keep:29663929ce1833893627e5cb60567054+5960
+  - class: Directory
+    location: keep:6d04859c4ba2394a2a9168c7858292c9+8063
+  - class: Directory
+    location: keep:a84b2c7921fb9388272b45d309fdd9e9+6273
+  - class: Directory
+    location: keep:b418c733542f662e0a9e803b62869c99+6313
+  - class: Directory
+    location: keep:019ab93b84966cc79ab08e33b27a80bd+6526
+  - class: Directory
+    location: keep:d24e2eb9b20a2576513923571c64ccd8+6360
+  - class: Directory
+    location: keep:5ad8f70496af453d89c14188521b4fc7+6610
+  - class: Directory
+    location: keep:47721d4b0594ab33fcb506bdc3db0598+6385
+  - class: Directory
+    location: keep:51a06e0f5729cb4c107f55d6c8529a63+5602
+  - class: Directory
+    location: keep:f04b354ff7b9ca5d384f3a1a62cf71d9+5603
+  - class: Directory
+    location: keep:fbe0344b41bb5d7e4cc1d2c3979bc928+5625
+  - class: Directory
+    location: keep:f9c7197f69cecc0e61a64f1a4b0852e2+5770
+  - class: Directory
+    location: keep:4aa25af9a26023744d7ea569b47c7410+6569
+  - class: Directory
+    location: keep:080f588bf847f736afff2f9ed0ae2322+4755
+  - class: Directory
+    location: keep:d3fd12f7113d5f8f98c022e35c39ca37+5465
+  - class: Directory
+    location: keep:732ada30026c0cda98767d0de1cb471f+5599
+  - class: Directory
+    location: keep:a5f5110ff8d02a65ddcdceadaa2d2e70+6101
+  - class: Directory
+    location: keep:0343282034802ee2ce544e802326c94a+6904
+  - class: Directory
+    location: keep:4cb76d00b852b59f2a8b02097ae681de+6821
+  - class: Directory
+    location: keep:aa7389dd1210cce54963946ef62254c1+5343
+  - class: Directory
+    location: keep:92945c6d1adaad79b9b49b6753c83c0a+6486
+  - class: Directory
+    location: keep:000b0c2f2178a87122d7d4a457e078aa+7748
+  - class: Directory
+    location: keep:28904a543f55f7b2cb74776a8ebbdd4e+6484
+  - class: Directory
+    location: keep:3cc1a31b2091917d957e7bffe13fcd9a+6526
+  - class: Directory
+    location: keep:d83e65bb86859184e5f9faf27f3faed1+8711
+  - class: Directory
+    location: keep:93aaae32487a543c463d326b3fc97273+6695
+  - class: Directory
+    location: keep:61b6750dcee20583e203995b834d038a+6738
+  - class: Directory
+    location: keep:84ad1c8dd9a579552ba716ac55bf5358+5938
+  - class: Directory
+    location: keep:c2f1f15d4979f594246d59988b388e90+5343
+  - class: Directory
+    location: keep:40b236d5a6c08cd99449fd3d448cd34f+5219
+  - class: Directory
+    location: keep:e99ad05c03b4ca78056def975357c33e+7357
+  - class: Directory
+    location: keep:f33adc2512543b97b9ab83c8817b25b8+6382
+  - class: Directory
+    location: keep:8f9965977d52671e7fb3f7b04c6688a0+6818
+  - class: Directory
+    location: keep:eeb6989418ad1779ed2ed6bfce61b56d+6483
+  - class: Directory
+    location: keep:50abc3635341fd2dc74cf1ac82717e86+6018
+  - class: Directory
+    location: keep:179334d433abd2cf712a6ddf928e7dae+6062
+  - class: Directory
+    location: keep:5256caf06d5d611dc89d6d45b19c961c+5851
+  - class: Directory
+    location: keep:05a0664d1a252cd58bc73839c71ff3b5+6147
+  - class: Directory
+    location: keep:3b1bb7557c5dca8ab4081701c93eab02+6733
+  - class: Directory
+    location: keep:5200db4989f48b68ac91436dfa3b70e3+6019
+  - class: Directory
+    location: keep:57f2909df7fe65a13d97a714dbac60e8+6438
+  - class: Directory
+    location: keep:58c6974d0178c4e017ed194791e8e63d+7111
+  - class: Directory
+    location: keep:27bd5b627a26014528b3b5a9e4f8973c+5597
+  - class: Directory
+    location: keep:b82473f5704b7c15b1d43aaabadba8d6+6271
+  - class: Directory
+    location: keep:cee144cd698a64b65a5439b7f423cfaf+6693
+  - class: Directory
+    location: keep:eb01a678af872384129e2a9f081a668f+5345
+  - class: Directory
+    location: keep:cda369ef18201bfdc7817ed1be13d0b6+6146
+  - class: Directory
+    location: keep:bb302fccf71c54665b240e105e166424+4285
+  - class: Directory
+    location: keep:7d27bc720f96d96e00695aee5cb96fc6+5937
+  - class: Directory
+    location: keep:30ecf50e1c96904e64077038fef7345f+6314
+  - class: Directory
+    location: keep:5027177bf4cd967718770a27e49aef7c+7792
+  - class: Directory
+    location: keep:9c441a1c11d7dcca710b35d8e9b9f191+3039
+  - class: Directory
+    location: keep:dcc047dfb8b5801d3057799576d26543+9427
+  - class: Directory
+    location: keep:a47eb87943c0f6b431c30214720f5353+6736
+  - class: Directory
+    location: keep:98f7b440486b2bd467a6fa244e5a1b21+9336
+  - class: Directory
+    location: keep:5eac8f5f4f204f3717af3ffe18499417+7576
+  - class: Directory
+    location: keep:5fa71e545b03d751f43efa83ae256177+6821
+  - class: Directory
+    location: keep:9b70322df1f1c512f29482e65c3023e1+5815
+  - class: Directory
+    location: keep:5eec0be3c3953751bce308157b368761+7492
+  - class: Directory
+    location: keep:d9bba0ab3740c8b3ce1e12cf1368ec74+6820
+  - class: Directory
+    location: keep:a252d847cf48aaf8bc5838440c884b9b+8293
+  - class: Directory
+    location: keep:ea4916d4a77bf641917403d0d27ff5f1+8126
+  - class: Directory
+    location: keep:18354a5104483120d3df163d41749e00+7620
+  - class: Directory
+    location: keep:879c142801965bcf44ae655f5882fd02+8952
+  - class: Directory
+    location: keep:576bcb8f7e6f9d2d60030c2478757f3c+6126
+  - class: Directory
+    location: keep:761ea5d6cd141f044b986926e6d012d2+7453
+  - class: Directory
+    location: keep:d37bb67c57dbc5588724c0cc66fc639d+10018
+  - class: Directory
+    location: keep:a815b0140c1aadeb1066bd7961fcea3e+5935
diff --git a/cwl/preprocess/vcfbed/get-vcfbed.cwl b/cwl/preprocess/vcfbed/get-vcfbed.cwl
new file mode 100644 (file)
index 0000000..e7f090b
--- /dev/null
@@ -0,0 +1,62 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+requirements:
+  InlineJavascriptRequirement: {}
+hints:
+  cwltool:LoadListingRequirement:
+    loadListing: deep_listing
+cwlVersion: v1.0
+class: ExpressionTool
+label: Scatter over directory to pair VCF, BED and index files
+inputs:
+  vcfsdir:
+    type: Directory
+    label: Directory containing compressed VCF, BED, and index files for processing
+  bedfile:
+    type: File?
+    label: Optional BED to scatter over if not included in vcfsdir
+outputs:
+  vcfs:
+    type: File[]
+    label: Array of compressed VCF files from input directory
+    secondaryFiles: [.tbi]
+  beds:
+    type: File[]
+    label: Array of BED files from input directory
+  outnames:
+    type: string[]
+    label: Array of file names to maintain naming convention for gVCF conversion
+expression: |
+  ${
+    var vcfs = [];
+    var beds = [];
+    var outnames = [];
+
+    for (var i = 0; i < inputs.vcfsdir.listing.length; i++) {
+      var file = inputs.vcfsdir.listing[i];
+      if (file.nameext == '.gz') {
+        var main = file;
+        var baseName = file.nameroot.split(".")[0];
+        var mainName = baseName+'.vcf.gz';
+        for (var j = 0; j < inputs.vcfsdir.listing.length; j++) {
+          var file = inputs.vcfsdir.listing[j];
+          if (file.basename == baseName+".tbi") {
+            main.secondaryFiles = [file];
+          } else if (inputs.bedfile) {
+            var bed = inputs.bedfile;
+          } else if (file.basename == baseName+".bed") {
+            var bed = file;
+          }
+        }
+        vcfs.push(main);
+        beds.push(bed);
+        outnames.push(mainName);
+      }
+    }
+    return {"vcfs": vcfs, "beds": beds, "outnames": outnames};
+  }
diff --git a/cwl/preprocess/vcfbed/intersect-vcfbed.cwl b/cwl/preprocess/vcfbed/intersect-vcfbed.cwl
new file mode 100644 (file)
index 0000000..b7e3501
--- /dev/null
@@ -0,0 +1,52 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: CommandLineTool
+label: Intersect VCF and BED
+requirements:
+  - class: ShellCommandRequirement
+  - class: DockerRequirement
+    dockerPull: l7g/preprocess-vcfbed
+  - class: ResourceRequirement
+    ramMin: 12000
+inputs:
+  vcf:
+    type: File
+    label: VCF to be intersected
+  bed:
+    type: File
+    label: BED to intersect with VCF
+outputs:
+  intersectedvcf:
+    type: File
+    label: Intersected VCF with 100% alignment
+    outputBinding:
+      glob: "*.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: [bedtools, intersect]
+arguments:
+  - "-header"
+  - prefix: "-a"
+    valueFrom: $(inputs.vcf)
+  - prefix: "-b"
+    valueFrom: $(inputs.bed)
+  - prefix: "-f"
+    valueFrom: "1"
+  - shellQuote: false
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: false
+    valueFrom: ">"
+  - $(inputs.vcf.basename)
+  - shellQuote: false
+    valueFrom: "&&"
+  - "tabix"
+  - prefix: "-p"
+    valueFrom: "vcf"
+  - $(inputs.vcf.basename)
diff --git a/cwl/preprocess/vcfbed/sort-bed.cwl b/cwl/preprocess/vcfbed/sort-bed.cwl
new file mode 100644 (file)
index 0000000..c2ad861
--- /dev/null
@@ -0,0 +1,32 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: CommandLineTool
+label: Sort BED by natural ordering (1,2,10,M,X)
+requirements:
+  - class: ShellCommandRequirement
+  - class: DockerRequirement
+    dockerPull: l7g/preprocess-vcfbed
+inputs:
+  bed:
+   type: File
+   label: BED to be sorted by natural ordering
+outputs:
+  sortedbed:
+    type: File
+    label: BED sorted by natural ordering
+    outputBinding:
+      glob: "*.bed"
+baseCommand: sort
+arguments:
+  - prefix: "-k1,1V"
+    valueFrom: "-k2,2n"
+  - $(inputs.bed)
+  - shellQuote: False
+    valueFrom: ">"
+  - $(inputs.bed.basename)
\ No newline at end of file
diff --git a/cwl/preprocess/vcfbed/sort-vcf.cwl b/cwl/preprocess/vcfbed/sort-vcf.cwl
new file mode 100644 (file)
index 0000000..6050c8f
--- /dev/null
@@ -0,0 +1,42 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: CommandLineTool
+label: Sort VCF by natural ordering (1,2,10,M,X)
+requirements:
+  - class: ShellCommandRequirement
+  - class: DockerRequirement
+    dockerPull: l7g/preprocess-vcfbed
+inputs:
+  vcf:
+    type: File
+    label: Compressed VCF to be sorted by natural ordering
+outputs:
+  sortedvcf:
+    type: File
+    label: Compressed VCF sorted by natural ordering
+    outputBinding:
+      glob: "*.vcf.gz"
+    secondaryFiles: [.tbi]
+baseCommand: vcf-sort
+arguments:
+  - prefix: "-c"
+    valueFrom: $(inputs.vcf)
+  - shellQuote: False
+    valueFrom: "|"
+  - "bgzip"
+  - "-c"
+  - shellQuote: False
+    valueFrom: ">"
+  - $(inputs.vcf.basename)
+  - shellQuote: False
+    valueFrom: "&&"
+  - "tabix"
+  - prefix: "-p"
+    valueFrom: "vcf"
+  - $(inputs.vcf.basename)
\ No newline at end of file
diff --git a/cwl/preprocess/vcfbed/src/convert-vcf-bed-to-gvcf b/cwl/preprocess/vcfbed/src/convert-vcf-bed-to-gvcf
new file mode 100755 (executable)
index 0000000..0a0716d
--- /dev/null
@@ -0,0 +1,66 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+#!/bin/bash
+#
+# vcfbed2homref is expected to be available
+# The VCF file is expected to be indexed and gzip'd
+# The BED file is expected to be uncompressed
+# The reference file is expected to be compressed and indexed
+#
+
+VERBOSE=1
+
+invcf="$1"
+inbed="$2"
+inref="$3"
+outvcf="$4"
+
+if [[ "$invcf" == "" ]] || [[ "$inbed" == "" ]] || [[ "$inref" == "" ]] ; then
+  echo "provide input vcf and bed file"
+  exit 1
+fi
+
+if [[ "$outvcf" == "" ]] ; then
+  outvcf="out.vcf.gz"
+fi
+
+function choosecat {
+  inf="$1"
+
+  ftype=`file -b -i -L "$inf" | cut -f1 -d';'`
+
+  if [[ ! -e "$inf" ]] ; then
+    echo "cat"
+  elif [[ "$ftype" == "application/x-bzip2" ]] ; then
+    echo "bzcat"
+  elif [[ "$ftype" == "application/gzip" ]] ; then
+    echo "zcat"
+  elif [[ "$ftype" == "text/plain" ]] ; then
+    echo "cat"
+  else
+    echo "cat"
+  fi
+}
+vcfcat=`choosecat "$invcf"`
+bedcat=`choosecat "$inbed"`
+
+if [[ "$VERBOSE" -eq 1 ]] ; then
+  echo "vcfbed2homref -r \"$inref\" -b <( $bedcat \"$inbed\" ) <( $vcfcat \"$invcf\" ) | bgzip -c > \"$outvcf\" "
+fi
+
+vcfbed2homref -r "$inref" -b "$inbed" "$invcf" | \
+  bgzip -c > "$outvcf"
+r=$?
+
+if [[ "$VERBOSE" -eq 1 ]] ; then
+  echo "### finished conversion, exit code $r"
+  echo "### indexing $outvcf"
+fi
+
+tabix -f "$outvcf"
+
+if [[ "$VERBOSE" -eq 1 ]] ; then
+  echo "### done"
+fi
\ No newline at end of file
diff --git a/cwl/preprocess/vcfbed/vcfbed2gvcf-wf-lite.cwl b/cwl/preprocess/vcfbed/vcfbed2gvcf-wf-lite.cwl
new file mode 100644 (file)
index 0000000..21c0c07
--- /dev/null
@@ -0,0 +1,49 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: Workflow
+label: Preprocess VCF and BED files to create a collection of gVCF files
+requirements:
+  DockerRequirement:
+    dockerPull: l7g/preprocess-vcfbed
+  ScatterFeatureRequirement: {}
+
+inputs:
+  vcfsdir:
+    type: Directory
+    label: Directory of VCF, BED and index files
+  ref:
+    type: File
+    label: Reference FASTA file
+  bedfile:
+    type: File?
+    label: Optional BED to scatter over if not included in vcfsdir
+
+outputs:
+  result:
+    type: File[]
+    label: gVCFs and index files
+    outputSource: vcfbed2gvcf/result
+
+steps:
+  get-vcfbed:
+    run: get-vcfbed.cwl
+    in:
+      vcfsdir: vcfsdir
+      bedfile: bedfile
+    out: [vcfs, beds, outnames]
+  vcfbed2gvcf:
+    run: vcfbed2gvcf.cwl
+    scatter: [vcf, bed, outname]
+    scatterMethod: dotproduct
+    in:
+      vcf: get-vcfbed/vcfs
+      bed: get-vcfbed/beds
+      ref: ref
+      outname: get-vcfbed/outnames
+    out: [result]
diff --git a/cwl/preprocess/vcfbed/vcfbed2gvcf-wf.cwl b/cwl/preprocess/vcfbed/vcfbed2gvcf-wf.cwl
new file mode 100644 (file)
index 0000000..974a39a
--- /dev/null
@@ -0,0 +1,72 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: Workflow
+label: Preprocess VCF and BED files to create a collection of gVCF files
+requirements:
+  - class: DockerRequirement
+    dockerPull: l7g/preprocess-vcfbed
+  - class: ResourceRequirement
+    coresMin: 1
+    ramMin: 12000
+  - class: ScatterFeatureRequirement
+
+inputs:
+  vcfsdir:
+    type: Directory
+    label: Directory of VCF, BED and index files
+  ref:
+    type: File
+    label: Reference FASTA file
+  bedfile:
+    type: File?
+    label: Optional BED to scatter over if not included in vcfsdir
+
+outputs:
+  result:
+    type: File[]
+    label: Directory containing gVCF and index files
+    outputSource: vcfbed2gvcf/result
+
+steps:
+  get-vcfbed:
+    run: get-vcfbed.cwl
+    in:
+      vcfsdir: vcfsdir
+      bedfile: bedfile
+    out: [vcfs, beds, outnames]
+  sort-vcf:
+    run: sort-vcf.cwl
+    scatter: vcf
+    in:
+      vcf: get-vcfbed/vcfs
+    out: [sortedvcf]
+  sort-bed:
+    scatter: bed
+    run: sort-bed.cwl
+    in:
+      bed: get-vcfbed/beds
+    out: [sortedbed]
+  intersect-vcfbed:
+    run: intersect-vcfbed.cwl
+    scatter: [vcf, bed]
+    scatterMethod: dotproduct
+    in:
+      vcf: sort-vcf/sortedvcf
+      bed: sort-bed/sortedbed
+    out: [intersectedvcf]
+  vcfbed2gvcf:
+    run: vcfbed2gvcf.cwl
+    scatter: [vcf, bed, outname]
+    scatterMethod: dotproduct
+    in:
+      vcf: intersect-vcfbed/intersectedvcf
+      bed: sort-bed/sortedbed
+      ref: ref
+      outname: get-vcfbed/outnames
+    out: [result]
diff --git a/cwl/preprocess/vcfbed/vcfbed2gvcf.cwl b/cwl/preprocess/vcfbed/vcfbed2gvcf.cwl
new file mode 100644 (file)
index 0000000..ae07886
--- /dev/null
@@ -0,0 +1,55 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+cwlVersion: v1.0
+class: CommandLineTool
+label: Creates gVCF with a given VCF, BED and reference FASTA
+requirements:
+  - class: DockerRequirement
+    dockerPull: l7g/preprocess-vcfbed
+  - class: ResourceRequirement
+    coresMin: 1
+baseCommand: bash
+inputs:
+  script:
+    type: File
+    label: Script to run vcfbed2homref, compress and index VCF
+    default:
+      class: File
+      location: src/convert-vcf-bed-to-gvcf
+    inputBinding:
+      position: 1
+  vcf:
+    type: File
+    label: VCF to be converted to gVCF
+    inputBinding:
+      position: 2
+    secondaryFiles:
+      - .tbi
+  bed:
+    type: File
+    label: BED representing called region of VCF
+    inputBinding:
+      position: 3
+  ref:
+    type: File
+    label: Compressed FASTA reference
+    inputBinding:
+      position: 4
+  outname:
+    type: string
+    label: String to maintain VCF naming convention for gVCF
+    inputBinding:
+      position: 5
+outputs:
+  result:
+    type: File
+    label: Compressed gVCF and index file
+    outputBinding:
+      glob: "*.vcf.gz"
+    secondaryFiles:
+      - .tbi
diff --git a/cwl/preprocess/vcfbed/yml/single-bed.yml b/cwl/preprocess/vcfbed/yml/single-bed.yml
new file mode 100644 (file)
index 0000000..23966c1
--- /dev/null
@@ -0,0 +1,13 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfsdir:
+  class: Directory
+  path: keep:890e2842e843ee611c82ffeb033b5f4c+385
+bedfile:
+  class: File
+  path: keep:a33dcf43e857a96eef48d93a76615285+140/hu01F73B_masterVarBeta-GS000037833-ASM.bed
+ref:
+  class: File
+  path: keep:5858d764a2e7ef4dfbb2ca3490b3e3c1+2334/chrM_37/g1k/human_g1k_v37.fasta.gz
diff --git a/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-1kcgivcfbed.yml b/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-1kcgivcfbed.yml
new file mode 100644 (file)
index 0000000..7a7eb56
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfsdir:
+  class: Directory
+  path: keep:e3daea88dee2bc69f5ba24f0089c7387+180598
+ref:
+  class: File
+  path: keep:5858d764a2e7ef4dfbb2ca3490b3e3c1+2334/chrM_37/g1k/human_g1k_v37.fasta.gz
diff --git a/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi-5samples.yml b/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi-5samples.yml
new file mode 100644 (file)
index 0000000..bcd25fc
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfsdir:
+  class: Directory
+  path: keep:237e82da5f78bd5d8e2640547c984bec+1698
+ref:
+  class: File
+  path: keep:5858d764a2e7ef4dfbb2ca3490b3e3c1+2334/chrM_37/g1k/human_g1k_v37.fasta.gz
diff --git a/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi.yml b/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi.yml
new file mode 100644 (file)
index 0000000..10c7a24
--- /dev/null
@@ -0,0 +1,10 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+vcfsdir:
+  class: Directory
+  path: keep:a216d5da3c7ad73b0242e57678f48f56+58474
+ref:
+  class: File
+  path: keep:5858d764a2e7ef4dfbb2ca3490b3e3c1+2334/chrM_37/g1k/human_g1k_v37.fasta.gz
diff --git a/docker/beagle5.4/Dockerfile b/docker/beagle5.4/Dockerfile
new file mode 100644 (file)
index 0000000..1165628
--- /dev/null
@@ -0,0 +1,59 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+FROM ubuntu:16.04
+MAINTAINER Jiayong Li <jli@curii.com>
+USER root
+
+RUN apt-get update --fix-missing -qy
+
+RUN apt-get install -qy wget \
+        build-essential \
+        cmake \
+        zlib1g-dev \
+        libbz2-dev \
+        liblzma-dev \
+        libncurses5-dev \
+        libncursesw5-dev \
+        gcc \
+        make \
+        python \
+        git
+
+#Installing OpenJDK-8
+RUN apt-get update && \
+    apt-get install -y openjdk-8-jdk && \
+    apt-get install -y ant && \
+    apt-get clean
+
+#Fixing certificate issues
+RUN apt-get update && \
+    apt-get install ca-certificates-java && \
+    apt-get clean && \
+    update-ca-certificates -f
+
+# Getting beagle5.4
+RUN wget https://faculty.washington.edu/browning/beagle/beagle.05May22.33a.jar
+RUN wget https://faculty.washington.edu/browning/beagle/bref3.05May22.33a.jar
+
+# Getting HTSLIB 1.9 for tabix/bgzip
+
+RUN wget https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2 && tar -xjvf htslib-1.9.tar.bz2 && \
+    cd htslib-1.9 && ./configure && make && make install
+
+WORKDIR /
+
+# Installing samtools 1.9
+
+RUN wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2 && tar -xjvf samtools-1.9.tar.bz2 && \
+    cd samtools-1.9 && ./configure && make && make install
+
+WORKDIR /
+
+# Installing bcftools 1.9
+
+RUN wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2 && tar -xjvf bcftools-1.9.tar.bz2 && \
+    cd bcftools-1.9 && ./configure && make && make install
+
+WORKDIR /
diff --git a/docker/cgivar2vcfbed/Dockerfile b/docker/cgivar2vcfbed/Dockerfile
new file mode 100644 (file)
index 0000000..eb8ee22
--- /dev/null
@@ -0,0 +1,40 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+FROM arvados/jobs
+MAINTAINER Jiayong Li <jli@curii.com>
+
+USER root
+
+RUN apt-get update -q
+
+RUN apt-get install -qy build-essential wget cmake zlib1g-dev git
+
+# Installing cgatools 1.8.0
+
+RUN wget https://sourceforge.net/projects/cgatools/files/1.8.0/cgatools-1.8.0.1-linux_binary-x86_64.tar.gz && \
+    tar -xzvf cgatools-1.8.0.1-linux_binary-x86_64.tar.gz && \
+    cp cgatools-1.8.0.1-linux_binary-x86_64/bin/cgatools /usr/local/bin
+
+WORKDIR /
+
+# Getting HTSLIB for tabix/bgzip
+
+RUN wget https://github.com/samtools/htslib/releases/download/1.3.1/htslib-1.3.1.tar.bz2 && \
+    tar -xjvf htslib-1.3.1.tar.bz2 && \
+    cd htslib-1.3.1 && ./configure && make && make install
+
+WORKDIR /
+
+# Installing bedtools v2.25.0 and dependencies
+
+RUN wget https://github.com/arq5x/bedtools2/releases/download/v2.25.0/bedtools-2.25.0.tar.gz && \
+    tar -xzvf bedtools-2.25.0.tar.gz && \
+    cd bedtools2/ && make && cp /bedtools2/bin/bedtools /usr/local/bin
+
+WORKDIR /
+
+# Installing gvcf_regions
+
+RUN git clone https://github.com/lijiayong/gvcf_regions
diff --git a/docker/lightning/Dockerfile b/docker/lightning/Dockerfile
new file mode 100644 (file)
index 0000000..d920602
--- /dev/null
@@ -0,0 +1,47 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+FROM ubuntu:latest
+MAINTAINER Jiayong Li <jli@curii.com>
+USER root
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Install necessary dependencies
+
+RUN apt-get update --fix-missing -qy
+
+RUN apt-get install -qy --no-install-recommends wget \
+  build-essential \
+  cmake \
+  zlib1g-dev \
+  libbz2-dev \
+  liblzma-dev \
+  libncurses5-dev \
+  libncursesw5-dev \
+  gcc \
+  make \
+  python3.8 \
+  python3-pip \
+  python3-numpy \
+  python3-pandas \
+  python3-scipy \
+  python3-matplotlib \
+  git
+
+RUN pip3 install sklearn
+RUN pip3 install --upgrade scipy
+
+# Installing go 1.19
+
+RUN wget https://go.dev/dl/go1.19.3.linux-amd64.tar.gz && \
+    tar -C /usr/local -xzf go1.19.3.linux-amd64.tar.gz
+
+ENV PATH $PATH:/usr/local/go/bin:/root/go/bin
+
+# Getting lightning
+
+RUN git clone https://github.com/arvados/lightning && \
+    cd lightning && make
+
+WORKDIR /
diff --git a/docker/snpeff/Dockerfile b/docker/snpeff/Dockerfile
new file mode 100644 (file)
index 0000000..e37c3d4
--- /dev/null
@@ -0,0 +1,55 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+FROM ubuntu:18.04
+MAINTAINER Jiayong Li <jli@curii.com>
+USER root
+
+# Install necessary dependencies
+
+RUN apt-get update --fix-missing -qy
+
+RUN apt-get install -qy wget \
+        build-essential \
+        cmake \
+        zlib1g-dev \
+        libbz2-dev \
+        liblzma-dev \
+        libncurses5-dev \
+        libncursesw5-dev \
+        gcc \
+        make \
+        python \
+        python3 \
+        git \
+        default-jdk \
+        unzip
+
+# Getting HTSLIB 1.10.2 for tabix/bgzip
+
+RUN wget https://github.com/samtools/htslib/releases/download/1.10.2/htslib-1.10.2.tar.bz2 && tar -xjvf htslib-1.10.2.tar.bz2 && \
+    cd htslib-1.10.2 && ./configure && make && make install
+
+WORKDIR /
+
+# Installing samtools 1.10
+
+RUN wget https://github.com/samtools/samtools/releases/download/1.10/samtools-1.10.tar.bz2 && tar -xjvf samtools-1.10.tar.bz2 && \
+    cd samtools-1.10 && ./configure && make && make install
+
+WORKDIR /
+
+# Installing bcftools 1.10.2
+
+RUN wget https://github.com/samtools/bcftools/releases/download/1.10.2/bcftools-1.10.2.tar.bz2 && tar -xjvf bcftools-1.10.2.tar.bz2 && \
+    cd bcftools-1.10.2 && ./configure && make && make install
+
+WORKDIR /
+
+# Getting snpEff4.3t
+
+RUN wget http://sourceforge.net/projects/snpeff/files/snpEff_v4_3t_core.zip && \
+    unzip snpEff_v4_3t_core.zip
+
+WORKDIR /
diff --git a/docker/vcfutil/Dockerfile b/docker/vcfutil/Dockerfile
new file mode 100644 (file)
index 0000000..d3427b7
--- /dev/null
@@ -0,0 +1,66 @@
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+FROM arvados/jobs
+MAINTAINER Jiayong Li <jli@curii.com>
+
+USER root
+
+RUN apt-get update -q
+
+RUN apt-get install -qy build-essential wget cmake zlib1g-dev \
+    libbz2-dev liblzma-dev libncurses5-dev libncursesw5-dev git vcftools
+
+# Getting HTSLIB 1.9 for tabix/bgzip
+
+RUN wget https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2 && tar -xjvf htslib-1.9.tar.bz2 && \
+    cd htslib-1.9 && ./configure && make && make install
+
+WORKDIR /
+
+# Installing samtools 1.9
+
+RUN wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2 && tar -xjvf samtools-1.9.tar.bz2 && \
+    cd samtools-1.9 && ./configure && make && make install
+
+WORKDIR /
+
+# Installing bcftools 1.9
+
+RUN wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2 && tar -xjvf bcftools-1.9.tar.bz2 && \
+    cd bcftools-1.9 && ./configure && make && make install
+
+WORKDIR /
+
+# Installing rtg tools v3.8.4
+
+RUN apt-get install -qy unzip
+
+RUN wget https://github.com/RealTimeGenomics/rtg-tools/releases/download/3.9.1/rtg-tools-3.9.1-linux-x64.zip && \
+    unzip rtg-tools-3.9.1-linux-x64.zip && \
+    cd rtg-tools-3.9.1 && ./rtg help
+
+ENV PATH="/rtg-tools-3.9.1:${PATH}"
+
+WORKDIR /
+
+# Installing bedops v2.4.35
+
+RUN wget https://github.com/bedops/bedops/releases/download/v2.4.35/bedops_linux_x86_64-v2.4.35.tar.bz2 && \
+    tar jxvf bedops_linux_x86_64-v2.4.35.tar.bz2 && \
+    cp bin/* /usr/local/bin
+
+WORKDIR /
+
+# Installing bedtools 2.27.1
+
+RUN wget https://github.com/arq5x/bedtools2/releases/download/v2.27.1/bedtools-2.27.1.tar.gz && \
+    tar -xzvf bedtools-2.27.1.tar.gz && \
+    cd bedtools2 && make && cp bin/* /usr/local/bin
+
+WORKDIR /
+
+# Installing gvcf_regions
+
+RUN git clone https://github.com/lijiayong/gvcf_regions