From 93917ecafad9e6d811daf600ccf53991b1cbc86e Mon Sep 17 00:00:00 2001 From: Jiayong Li Date: Fri, 9 Dec 2022 21:03:13 +0000 Subject: [PATCH] Add cwl and docker files refs #19785 Arvados-DCO-1.1-Signed-off-by: Jiayong Li --- .licenseignore | 1 + cwl/annotation/annotate-wf.cwl | 83 ++++ cwl/annotation/bcftools-concat.cwl | 40 ++ cwl/annotation/getcount.cwl | 27 + cwl/annotation/getfiles.cwl | 59 +++ cwl/annotation/preprocess.cwl | 32 ++ cwl/annotation/snpeff-bcftools-annotate.cwl | 103 ++++ cwl/annotation/src/getcount.sh | 18 + cwl/annotation/src/totalcounts.sh | 28 ++ cwl/annotation/totalcounts.cwl | 26 + ...ate-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml | 43 ++ cwl/annotation/yml/preprocess-test.yml | 8 + ...eff-bcftools-annotate-annotation_chr19.yml | 18 + ...bcftools-annotate-annotation_chr19_new.yml | 18 + ...eff-bcftools-annotate-annotation_chr22.yml | 18 + cwl/comparevcf/change-GT.cwl | 29 ++ .../comparevcf-original-lightning-wf.cwl | 66 +++ cwl/comparevcf/rtg-vcfeval.cwl | 35 ++ cwl/comparevcf/src/change-GT.sh | 15 + cwl/comparevcf/src/concatenate.sh | 18 + ...ing-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml | 23 + cwl/gvcf2fasta/append-sampleid.cwl | 18 + cwl/gvcf2fasta/bcftools-consensus.cwl | 44 ++ cwl/gvcf2fasta/concat-get_bed_varonlyvcf.cwl | 52 ++ cwl/gvcf2fasta/fixvcf-get_bed_varonlyvcf.cwl | 49 ++ cwl/gvcf2fasta/get_bed_varonlyvcf.cwl | 80 +++ cwl/gvcf2fasta/getfiles.cwl | 37 ++ cwl/gvcf2fasta/gvcf2fasta-wf.cwl | 62 +++ .../gvcf2fasta_nonrefvcf-imputation-wf.cwl | 78 +++ cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-wf.cwl | 54 ++ .../gvcf2fasta_splitvcf-imputation-wf.cwl | 82 ++++ cwl/gvcf2fasta/gvcf2fasta_splitvcf-wf.cwl | 56 +++ cwl/gvcf2fasta/gvcf2fasta_splitvcftar-wf.cwl | 62 +++ cwl/gvcf2fasta/scatter-gvcf2fasta-wf.cwl | 55 +++ .../scatter-gvcf2fasta_nonrefvcf-wf.cwl | 57 +++ ...tter-gvcf2fasta_splitvcf-imputation-wf.cwl | 65 +++ .../scatter-gvcf2fasta_splitvcf-wf.cwl | 55 +++ .../scatter-gvcf2fasta_splitvcftar-wf.cwl | 57 +++ cwl/gvcf2fasta/src/bcftools-consensus.sh | 18 + .../src/concat-get_bed_varonlyvcf.sh | 25 + .../src/fixvcf-get_bed_varonlyvcf.sh | 21 + cwl/gvcf2fasta/src/untar-concat.sh | 21 + .../untar-concat-get_bed_varonlyvcf.cwl | 97 ++++ ...000196-BL-NCR-14AD66938_phased_imputed.yml | 15 + .../yml/bcftools-consensus-test-newadni.yml | 15 + .../yml/bcftools-consensus-test-varonly.yml | 15 + .../yml/bcftools-consensus-test.yml | 15 + ...nlyvcf-A-WCAP-WC001544-BL-COL-49161BL1.yml | 12 + .../yml/get_bed_varonlyvcf-test.yml | 11 + .../yml/gvcf2fasta-wf-test-newadni.yml | 15 + cwl/gvcf2fasta/yml/gvcf2fasta-wf-test.yml | 15 + cwl/gvcf2fasta/yml/gvcf2fasta-wf-test37.yml | 15 + ...ion-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml | 25 + ...vcf-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml | 15 + ...on-wf-A-IIAA-IA000196-BL-NCR-14AD66938.yml | 28 ++ ...ion-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml | 25 + ...vcf-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml | 15 + .../yml/gvcf2fasta_splitvcftar-wf-test.yml | 15 + .../yml/scatter-gvcf2fasta-wf-16gvcf.yml | 13 + .../yml/scatter-gvcf2fasta-wf-test.yml | 13 + ...f2fasta_splitvcf-imputation-wf-100test.yml | 326 +++++++++++++ .../untar-concat-get_bed_varonlyvcf-test.yml | 12 + cwl/imputation/bcftools-concat.cwl | 36 ++ cwl/imputation/beagle.cwl | 65 +++ cwl/imputation/bedtools-intersect.cwl | 24 + cwl/imputation/bedtools-subtract.cwl | 24 + cwl/imputation/get-imputedvcf.cwl | 45 ++ cwl/imputation/get-phasedvcf.cwl | 41 ++ cwl/imputation/imputation-wf.cwl | 58 +++ cwl/imputation/match-ref-map-chr.cwl | 42 ++ cwl/imputation/merge-phased-imputed-wf.cwl | 69 +++ .../rtg-vcffilter-bedtools-intersect.cwl | 54 ++ cwl/imputation/rtg-vcffilter.cwl | 31 ++ cwl/imputation/rtg-vcfmerge.cwl | 36 ++ cwl/imputation/scatter-beagle-wf.cwl | 48 ++ ...tation-A-PRHS-PR000971-BL-COL-47620BL1.yml | 27 + ...filter-A-PRHS-PR000971-BL-COL-47620BL1.yml | 11 + cwl/imputation/yml/rtg-vcffilter-test.yml | 15 + cwl/lightning/batch-dirs.cwl | 31 ++ cwl/lightning/fasta2numpy-multi-wf.cwl | 138 ++++++ cwl/lightning/fasta2numpy-wf.cwl | 226 +++++++++ cwl/lightning/genreadme.cwl | 27 + .../imputation-gvcf2fasta-fasta2numpy-wf.cwl | 119 +++++ cwl/lightning/libray2numpy-wf.cwl | 138 ++++++ cwl/lightning/lightning-anno2vcf.cwl | 36 ++ cwl/lightning/lightning-choose-samples.cwl | 53 ++ cwl/lightning/lightning-import.cwl | 56 +++ cwl/lightning/lightning-plot.cwl | 53 ++ .../lightning-slice-numpy-anno2vcf-wf.cwl | 113 +++++ .../lightning-slice-numpy-onehot.cwl | 67 +++ cwl/lightning/lightning-slice-numpy-pca.cwl | 75 +++ cwl/lightning/lightning-slice-numpy.cwl | 68 +++ cwl/lightning/lightning-slice.cwl | 40 ++ cwl/lightning/lightning-tiling-stats.cwl | 35 ++ cwl/lightning/make-arrays.cwl | 63 +++ cwl/lightning/make-fastadirs.cwl | 35 ++ cwl/lightning/make-libname.cwl | 18 + cwl/lightning/readme.md | 17 + cwl/lightning/src/genreadme.py | 142 ++++++ cwl/lightning/stage-output.cwl | 40 ++ .../yml/fasta2numpy-multi-wf-0831_0315.yml | 353 ++++++++++++++ .../fasta2numpy-multi-wf-0831_0315_ADNI.yml | 353 ++++++++++++++ ...lti-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml | 59 +++ .../yml/fasta2numpy-multi-wf-public.yml | 327 +++++++++++++ .../yml/fasta2numpy-multi-wf-test.yml | 41 ++ .../yml/fasta2numpy-multi-wf-testonehot.yml | 68 +++ .../yml/fasta2numpy-wf-0831_0315.yml | 357 ++++++++++++++ cwl/lightning/yml/fasta2numpy-wf-100test.yml | 63 +++ ...tion-gvcf2fasta-fasta2numpy-wf-100test.yml | 337 +++++++++++++ .../yml/lightning-export-numpy-merged.yml | 8 + cwl/lightning/yml/lightning-import-ref37.yml | 11 + cwl/lightning/yml/lightning-import-ref38.yml | 11 + .../yml/lightning-import-testdata.yml | 15 + .../yml/lightning-merge-testdata_ref38.yml | 10 + ...ning-slice-numpy-anno2vcf-wf-0831_0315.yml | 12 + ...lightning-slice-numpy-anno2vcf-wf-test.yml | 12 + .../lightning-slice-numpy-onehot-A-ADC.yml | 14 + .../lightning-slice-numpy-onehot-A-CUHS.yml | 14 + .../lightning-slice-numpy-onehot-A-IIAA.yml | 14 + .../yml/lightning-slice-numpy-onehot-ADNI.yml | 14 + .../yml/lightning-slice-numpy-onehot-full.yml | 14 + ...ing-slice-numpy-onehot-pvalue1e-8-full.yml | 14 + .../yml/lightning-tiling-stats-ref37.yml | 7 + cwl/preprocess/cgivar/bedtools-intersect.cwl | 45 ++ cwl/preprocess/cgivar/cgatools-mkvcf.cwl | 29 ++ cwl/preprocess/cgivar/cgatools-mkvcf.sh | 10 + cwl/preprocess/cgivar/cgivar2vcfbed-wf.cwl | 69 +++ cwl/preprocess/cgivar/fix_vcf.cwl | 24 + cwl/preprocess/cgivar/fix_vcf.py | 45 ++ cwl/preprocess/cgivar/getfiles.cwl | 34 ++ cwl/preprocess/cgivar/gvcf_regions.cwl | 20 + .../cgivar/scatter-cgivar2vcfbed-wf.cwl | 62 +++ .../cgivar/yml/cgivar2vcfbed-wf-test.yml | 17 + .../yml/scatter-cgivar2vcfbed-wf-367set.yml | 10 + cwl/preprocess/chrmvcf/change_gt_M.js | 16 + cwl/preprocess/chrmvcf/change_gt_chrM.js | 16 + cwl/preprocess/chrmvcf/fixchrm-wf.cwl | 36 ++ cwl/preprocess/chrmvcf/fixchrm.cwl | 41 ++ cwl/preprocess/chrmvcf/getfiles.cwl | 28 ++ cwl/preprocess/chrmvcf/yml/fixchrm-test.yml | 10 + .../chrmvcf/yml/fixchrm-wf-1kcgi.yml | 10 + .../chrmvcf/yml/fixchrm-wf-harvardpgpcgi.yml | 10 + .../chrmvcf/yml/fixchrm-wf-simons.yml | 10 + cwl/preprocess/gvcf/filtercleangvcf-wf.cwl | 48 ++ cwl/preprocess/gvcf/filtercleangvcf.cwl | 75 +++ cwl/preprocess/gvcf/getfiles.cwl | 41 ++ cwl/preprocess/gvcf/src/cleanvcf.py | 40 ++ cwl/preprocess/gvcf/src/filter-gvcf | 60 +++ .../gvcf/yml/filtercleangvcf-test.yml | 8 + .../gvcf/yml/filtercleangvcf-wf-1kcgi.yml | 8 + .../yml/filtercleangvcf-wf-vcfbed-test.yml | 8 + cwl/preprocess/gvcf/yml/keepGQdot-test.yml | 9 + cwl/preprocess/haploidvcf/change_gt.js | 14 + cwl/preprocess/haploidvcf/fixgt-wf.cwl | 32 ++ cwl/preprocess/haploidvcf/fixgt.cwl | 55 +++ cwl/preprocess/haploidvcf/getfiles.cwl | 28 ++ cwl/preprocess/haploidvcf/yml/fixgt-test.yml | 7 + .../haploidvcf/yml/fixgt-wf-pgpcanada.yml | 7 + cwl/preprocess/nonrefvcf/fixnonref-wf.cwl | 32 ++ cwl/preprocess/nonrefvcf/fixnonref.cwl | 46 ++ cwl/preprocess/nonrefvcf/getfiles.cwl | 28 ++ .../nonrefvcf/yml/fixnonref-test.yml | 7 + .../nonrefvcf/yml/fixnonref-wf-1kgvcf.yml | 7 + .../nonrefvcf/yml/fixnonref-wf-ukpgp.yml | 7 + .../portablevcf/bcftools-annotate.cwl | 38 ++ .../portablevcf/bcftools-reheader.cwl | 30 ++ cwl/preprocess/portablevcf/cat.cwl | 22 + cwl/preprocess/portablevcf/getfiles.cwl | 28 ++ cwl/preprocess/portablevcf/header | 104 ++++ .../portablevcf/preprocess-portablevcf-wf.cwl | 69 +++ cwl/preprocess/portablevcf/rtg-vcfeval.cwl | 38 ++ .../scatter-preprocess-portablevcf-wf.cwl | 60 +++ cwl/preprocess/portablevcf/sort-clean.cwl | 46 ++ .../yml/bcftools-annotate-test.yml | 7 + .../yml/preprocess-portablevcf-wf.yml | 16 + .../yml/scatter-preprocess-portablevcf-wf.yml | 10 + cwl/preprocess/portablevcf/yml/sort-clean.cwl | 10 + cwl/preprocess/simons/filter-vcf.cwl | 53 ++ cwl/preprocess/simons/getfiles.cwl | 34 ++ cwl/preprocess/simons/make-bed.cwl | 58 +++ cwl/preprocess/simons/make-vcf-bed-wf.cwl | 64 +++ cwl/preprocess/simons/yml/filter-vcf-test.yml | 10 + cwl/preprocess/simons/yml/make-bed-test.yml | 10 + .../simons/yml/make-vcf-bed-wf-simons.yml | 12 + cwl/preprocess/splitvcf/concatvcf-wf.cwl | 33 ++ cwl/preprocess/splitvcf/concatvcf.cwl | 40 ++ cwl/preprocess/splitvcf/src/concatvcf.sh | 20 + .../splitvcf/yml/concatvcf-test.yml | 7 + .../splitvcf/yml/concatvcf-wf-test.yml | 461 ++++++++++++++++++ cwl/preprocess/vcfbed/get-vcfbed.cwl | 62 +++ cwl/preprocess/vcfbed/intersect-vcfbed.cwl | 52 ++ cwl/preprocess/vcfbed/sort-bed.cwl | 32 ++ cwl/preprocess/vcfbed/sort-vcf.cwl | 42 ++ .../vcfbed/src/convert-vcf-bed-to-gvcf | 66 +++ cwl/preprocess/vcfbed/vcfbed2gvcf-wf-lite.cwl | 49 ++ cwl/preprocess/vcfbed/vcfbed2gvcf-wf.cwl | 72 +++ cwl/preprocess/vcfbed/vcfbed2gvcf.cwl | 55 +++ cwl/preprocess/vcfbed/yml/single-bed.yml | 13 + .../vcfbed/yml/vcfbed2gvcf-wf-1kcgivcfbed.yml | 10 + .../yml/vcfbed2gvcf-wf-cgi-5samples.yml | 10 + .../vcfbed/yml/vcfbed2gvcf-wf-cgi.yml | 10 + docker/beagle5.4/Dockerfile | 59 +++ docker/cgivar2vcfbed/Dockerfile | 40 ++ docker/lightning/Dockerfile | 47 ++ docker/snpeff/Dockerfile | 55 +++ docker/vcfutil/Dockerfile | 66 +++ 206 files changed, 9941 insertions(+) create mode 100644 cwl/annotation/annotate-wf.cwl create mode 100644 cwl/annotation/bcftools-concat.cwl create mode 100644 cwl/annotation/getcount.cwl create mode 100644 cwl/annotation/getfiles.cwl create mode 100644 cwl/annotation/preprocess.cwl create mode 100644 cwl/annotation/snpeff-bcftools-annotate.cwl create mode 100755 cwl/annotation/src/getcount.sh create mode 100755 cwl/annotation/src/totalcounts.sh create mode 100644 cwl/annotation/totalcounts.cwl create mode 100644 cwl/annotation/yml/annotate-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml create mode 100644 cwl/annotation/yml/preprocess-test.yml create mode 100644 cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19.yml create mode 100644 cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19_new.yml create mode 100644 cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr22.yml create mode 100644 cwl/comparevcf/change-GT.cwl create mode 100644 cwl/comparevcf/comparevcf-original-lightning-wf.cwl create mode 100644 cwl/comparevcf/rtg-vcfeval.cwl create mode 100755 cwl/comparevcf/src/change-GT.sh create mode 100755 cwl/comparevcf/src/concatenate.sh create mode 100644 cwl/comparevcf/yml/comparevcf-original-lightning-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml create mode 100644 cwl/gvcf2fasta/append-sampleid.cwl create mode 100644 cwl/gvcf2fasta/bcftools-consensus.cwl create mode 100644 cwl/gvcf2fasta/concat-get_bed_varonlyvcf.cwl create mode 100644 cwl/gvcf2fasta/fixvcf-get_bed_varonlyvcf.cwl create mode 100644 cwl/gvcf2fasta/get_bed_varonlyvcf.cwl create mode 100644 cwl/gvcf2fasta/getfiles.cwl create mode 100644 cwl/gvcf2fasta/gvcf2fasta-wf.cwl create mode 100644 cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-imputation-wf.cwl create mode 100644 cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-wf.cwl create mode 100644 cwl/gvcf2fasta/gvcf2fasta_splitvcf-imputation-wf.cwl create mode 100644 cwl/gvcf2fasta/gvcf2fasta_splitvcf-wf.cwl create mode 100644 cwl/gvcf2fasta/gvcf2fasta_splitvcftar-wf.cwl create mode 100644 cwl/gvcf2fasta/scatter-gvcf2fasta-wf.cwl create mode 100644 cwl/gvcf2fasta/scatter-gvcf2fasta_nonrefvcf-wf.cwl create mode 100644 cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-imputation-wf.cwl create mode 100644 cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-wf.cwl create mode 100644 cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcftar-wf.cwl create mode 100755 cwl/gvcf2fasta/src/bcftools-consensus.sh create mode 100755 cwl/gvcf2fasta/src/concat-get_bed_varonlyvcf.sh create mode 100755 cwl/gvcf2fasta/src/fixvcf-get_bed_varonlyvcf.sh create mode 100755 cwl/gvcf2fasta/src/untar-concat.sh create mode 100644 cwl/gvcf2fasta/untar-concat-get_bed_varonlyvcf.cwl create mode 100644 cwl/gvcf2fasta/yml/bcftools-consensus-A-IIAA-IA000196-BL-NCR-14AD66938_phased_imputed.yml create mode 100644 cwl/gvcf2fasta/yml/bcftools-consensus-test-newadni.yml create mode 100644 cwl/gvcf2fasta/yml/bcftools-consensus-test-varonly.yml create mode 100644 cwl/gvcf2fasta/yml/bcftools-consensus-test.yml create mode 100644 cwl/gvcf2fasta/yml/concat-get_bed_varonlyvcf-A-WCAP-WC001544-BL-COL-49161BL1.yml create mode 100644 cwl/gvcf2fasta/yml/get_bed_varonlyvcf-test.yml create mode 100644 cwl/gvcf2fasta/yml/gvcf2fasta-wf-test-newadni.yml create mode 100644 cwl/gvcf2fasta/yml/gvcf2fasta-wf-test.yml create mode 100644 cwl/gvcf2fasta/yml/gvcf2fasta-wf-test37.yml create mode 100644 cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-imputation-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml create mode 100644 cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml create mode 100644 cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-IIAA-IA000196-BL-NCR-14AD66938.yml create mode 100644 cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml create mode 100644 cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml create mode 100644 cwl/gvcf2fasta/yml/gvcf2fasta_splitvcftar-wf-test.yml create mode 100644 cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-16gvcf.yml create mode 100644 cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-test.yml create mode 100644 cwl/gvcf2fasta/yml/scatter-gvcf2fasta_splitvcf-imputation-wf-100test.yml create mode 100644 cwl/gvcf2fasta/yml/untar-concat-get_bed_varonlyvcf-test.yml create mode 100644 cwl/imputation/bcftools-concat.cwl create mode 100644 cwl/imputation/beagle.cwl create mode 100644 cwl/imputation/bedtools-intersect.cwl create mode 100644 cwl/imputation/bedtools-subtract.cwl create mode 100644 cwl/imputation/get-imputedvcf.cwl create mode 100644 cwl/imputation/get-phasedvcf.cwl create mode 100644 cwl/imputation/imputation-wf.cwl create mode 100644 cwl/imputation/match-ref-map-chr.cwl create mode 100644 cwl/imputation/merge-phased-imputed-wf.cwl create mode 100644 cwl/imputation/rtg-vcffilter-bedtools-intersect.cwl create mode 100644 cwl/imputation/rtg-vcffilter.cwl create mode 100644 cwl/imputation/rtg-vcfmerge.cwl create mode 100644 cwl/imputation/scatter-beagle-wf.cwl create mode 100644 cwl/imputation/yml/imputation-A-PRHS-PR000971-BL-COL-47620BL1.yml create mode 100644 cwl/imputation/yml/rtg-vcffilter-A-PRHS-PR000971-BL-COL-47620BL1.yml create mode 100644 cwl/imputation/yml/rtg-vcffilter-test.yml create mode 100644 cwl/lightning/batch-dirs.cwl create mode 100644 cwl/lightning/fasta2numpy-multi-wf.cwl create mode 100644 cwl/lightning/fasta2numpy-wf.cwl create mode 100644 cwl/lightning/genreadme.cwl create mode 100644 cwl/lightning/imputation-gvcf2fasta-fasta2numpy-wf.cwl create mode 100644 cwl/lightning/libray2numpy-wf.cwl create mode 100644 cwl/lightning/lightning-anno2vcf.cwl create mode 100644 cwl/lightning/lightning-choose-samples.cwl create mode 100644 cwl/lightning/lightning-import.cwl create mode 100644 cwl/lightning/lightning-plot.cwl create mode 100644 cwl/lightning/lightning-slice-numpy-anno2vcf-wf.cwl create mode 100644 cwl/lightning/lightning-slice-numpy-onehot.cwl create mode 100644 cwl/lightning/lightning-slice-numpy-pca.cwl create mode 100644 cwl/lightning/lightning-slice-numpy.cwl create mode 100644 cwl/lightning/lightning-slice.cwl create mode 100644 cwl/lightning/lightning-tiling-stats.cwl create mode 100644 cwl/lightning/make-arrays.cwl create mode 100644 cwl/lightning/make-fastadirs.cwl create mode 100644 cwl/lightning/make-libname.cwl create mode 100644 cwl/lightning/readme.md create mode 100755 cwl/lightning/src/genreadme.py create mode 100644 cwl/lightning/stage-output.cwl create mode 100644 cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315.yml create mode 100644 cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315_ADNI.yml create mode 100644 cwl/lightning/yml/fasta2numpy-multi-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml create mode 100644 cwl/lightning/yml/fasta2numpy-multi-wf-public.yml create mode 100644 cwl/lightning/yml/fasta2numpy-multi-wf-test.yml create mode 100644 cwl/lightning/yml/fasta2numpy-multi-wf-testonehot.yml create mode 100644 cwl/lightning/yml/fasta2numpy-wf-0831_0315.yml create mode 100644 cwl/lightning/yml/fasta2numpy-wf-100test.yml create mode 100644 cwl/lightning/yml/imputation-gvcf2fasta-fasta2numpy-wf-100test.yml create mode 100644 cwl/lightning/yml/lightning-export-numpy-merged.yml create mode 100644 cwl/lightning/yml/lightning-import-ref37.yml create mode 100644 cwl/lightning/yml/lightning-import-ref38.yml create mode 100644 cwl/lightning/yml/lightning-import-testdata.yml create mode 100644 cwl/lightning/yml/lightning-merge-testdata_ref38.yml create mode 100644 cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-0831_0315.yml create mode 100644 cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-test.yml create mode 100644 cwl/lightning/yml/lightning-slice-numpy-onehot-A-ADC.yml create mode 100644 cwl/lightning/yml/lightning-slice-numpy-onehot-A-CUHS.yml create mode 100644 cwl/lightning/yml/lightning-slice-numpy-onehot-A-IIAA.yml create mode 100644 cwl/lightning/yml/lightning-slice-numpy-onehot-ADNI.yml create mode 100644 cwl/lightning/yml/lightning-slice-numpy-onehot-full.yml create mode 100644 cwl/lightning/yml/lightning-slice-numpy-onehot-pvalue1e-8-full.yml create mode 100644 cwl/lightning/yml/lightning-tiling-stats-ref37.yml create mode 100644 cwl/preprocess/cgivar/bedtools-intersect.cwl create mode 100644 cwl/preprocess/cgivar/cgatools-mkvcf.cwl create mode 100755 cwl/preprocess/cgivar/cgatools-mkvcf.sh create mode 100644 cwl/preprocess/cgivar/cgivar2vcfbed-wf.cwl create mode 100644 cwl/preprocess/cgivar/fix_vcf.cwl create mode 100755 cwl/preprocess/cgivar/fix_vcf.py create mode 100644 cwl/preprocess/cgivar/getfiles.cwl create mode 100644 cwl/preprocess/cgivar/gvcf_regions.cwl create mode 100644 cwl/preprocess/cgivar/scatter-cgivar2vcfbed-wf.cwl create mode 100644 cwl/preprocess/cgivar/yml/cgivar2vcfbed-wf-test.yml create mode 100644 cwl/preprocess/cgivar/yml/scatter-cgivar2vcfbed-wf-367set.yml create mode 100644 cwl/preprocess/chrmvcf/change_gt_M.js create mode 100644 cwl/preprocess/chrmvcf/change_gt_chrM.js create mode 100644 cwl/preprocess/chrmvcf/fixchrm-wf.cwl create mode 100644 cwl/preprocess/chrmvcf/fixchrm.cwl create mode 100644 cwl/preprocess/chrmvcf/getfiles.cwl create mode 100644 cwl/preprocess/chrmvcf/yml/fixchrm-test.yml create mode 100644 cwl/preprocess/chrmvcf/yml/fixchrm-wf-1kcgi.yml create mode 100644 cwl/preprocess/chrmvcf/yml/fixchrm-wf-harvardpgpcgi.yml create mode 100644 cwl/preprocess/chrmvcf/yml/fixchrm-wf-simons.yml create mode 100644 cwl/preprocess/gvcf/filtercleangvcf-wf.cwl create mode 100644 cwl/preprocess/gvcf/filtercleangvcf.cwl create mode 100644 cwl/preprocess/gvcf/getfiles.cwl create mode 100755 cwl/preprocess/gvcf/src/cleanvcf.py create mode 100755 cwl/preprocess/gvcf/src/filter-gvcf create mode 100644 cwl/preprocess/gvcf/yml/filtercleangvcf-test.yml create mode 100644 cwl/preprocess/gvcf/yml/filtercleangvcf-wf-1kcgi.yml create mode 100644 cwl/preprocess/gvcf/yml/filtercleangvcf-wf-vcfbed-test.yml create mode 100644 cwl/preprocess/gvcf/yml/keepGQdot-test.yml create mode 100644 cwl/preprocess/haploidvcf/change_gt.js create mode 100644 cwl/preprocess/haploidvcf/fixgt-wf.cwl create mode 100644 cwl/preprocess/haploidvcf/fixgt.cwl create mode 100644 cwl/preprocess/haploidvcf/getfiles.cwl create mode 100644 cwl/preprocess/haploidvcf/yml/fixgt-test.yml create mode 100644 cwl/preprocess/haploidvcf/yml/fixgt-wf-pgpcanada.yml create mode 100644 cwl/preprocess/nonrefvcf/fixnonref-wf.cwl create mode 100644 cwl/preprocess/nonrefvcf/fixnonref.cwl create mode 100644 cwl/preprocess/nonrefvcf/getfiles.cwl create mode 100644 cwl/preprocess/nonrefvcf/yml/fixnonref-test.yml create mode 100644 cwl/preprocess/nonrefvcf/yml/fixnonref-wf-1kgvcf.yml create mode 100644 cwl/preprocess/nonrefvcf/yml/fixnonref-wf-ukpgp.yml create mode 100644 cwl/preprocess/portablevcf/bcftools-annotate.cwl create mode 100644 cwl/preprocess/portablevcf/bcftools-reheader.cwl create mode 100644 cwl/preprocess/portablevcf/cat.cwl create mode 100644 cwl/preprocess/portablevcf/getfiles.cwl create mode 100644 cwl/preprocess/portablevcf/header create mode 100644 cwl/preprocess/portablevcf/preprocess-portablevcf-wf.cwl create mode 100644 cwl/preprocess/portablevcf/rtg-vcfeval.cwl create mode 100644 cwl/preprocess/portablevcf/scatter-preprocess-portablevcf-wf.cwl create mode 100644 cwl/preprocess/portablevcf/sort-clean.cwl create mode 100644 cwl/preprocess/portablevcf/yml/bcftools-annotate-test.yml create mode 100644 cwl/preprocess/portablevcf/yml/preprocess-portablevcf-wf.yml create mode 100644 cwl/preprocess/portablevcf/yml/scatter-preprocess-portablevcf-wf.yml create mode 100644 cwl/preprocess/portablevcf/yml/sort-clean.cwl create mode 100644 cwl/preprocess/simons/filter-vcf.cwl create mode 100644 cwl/preprocess/simons/getfiles.cwl create mode 100644 cwl/preprocess/simons/make-bed.cwl create mode 100644 cwl/preprocess/simons/make-vcf-bed-wf.cwl create mode 100644 cwl/preprocess/simons/yml/filter-vcf-test.yml create mode 100644 cwl/preprocess/simons/yml/make-bed-test.yml create mode 100644 cwl/preprocess/simons/yml/make-vcf-bed-wf-simons.yml create mode 100644 cwl/preprocess/splitvcf/concatvcf-wf.cwl create mode 100644 cwl/preprocess/splitvcf/concatvcf.cwl create mode 100755 cwl/preprocess/splitvcf/src/concatvcf.sh create mode 100644 cwl/preprocess/splitvcf/yml/concatvcf-test.yml create mode 100644 cwl/preprocess/splitvcf/yml/concatvcf-wf-test.yml create mode 100644 cwl/preprocess/vcfbed/get-vcfbed.cwl create mode 100644 cwl/preprocess/vcfbed/intersect-vcfbed.cwl create mode 100644 cwl/preprocess/vcfbed/sort-bed.cwl create mode 100644 cwl/preprocess/vcfbed/sort-vcf.cwl create mode 100755 cwl/preprocess/vcfbed/src/convert-vcf-bed-to-gvcf create mode 100644 cwl/preprocess/vcfbed/vcfbed2gvcf-wf-lite.cwl create mode 100644 cwl/preprocess/vcfbed/vcfbed2gvcf-wf.cwl create mode 100644 cwl/preprocess/vcfbed/vcfbed2gvcf.cwl create mode 100644 cwl/preprocess/vcfbed/yml/single-bed.yml create mode 100644 cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-1kcgivcfbed.yml create mode 100644 cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi-5samples.yml create mode 100644 cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi.yml create mode 100644 docker/beagle5.4/Dockerfile create mode 100644 docker/cgivar2vcfbed/Dockerfile create mode 100644 docker/lightning/Dockerfile create mode 100644 docker/snpeff/Dockerfile create mode 100644 docker/vcfutil/Dockerfile diff --git a/.licenseignore b/.licenseignore index acc2c05c1e..0d77b26cc4 100644 --- a/.licenseignore +++ b/.licenseignore @@ -3,3 +3,4 @@ AUTHORS go.mod go.sum testdata/* +readme.md diff --git a/cwl/annotation/annotate-wf.cwl b/cwl/annotation/annotate-wf.cwl new file mode 100644 index 0000000000..0e6971b73b --- /dev/null +++ b/cwl/annotation/annotate-wf.cwl @@ -0,0 +1,83 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: Workflow +requirements: + ScatterFeatureRequirement: {} + +inputs: + sample: string + chrs: string[] + vcfdir: Directory + snpeffdatadir: Directory + genomeversion: string + dbsnp: + type: File + secondaryFiles: [.csi] + gnomaddir: Directory + +outputs: + annotatedvcf: + type: File + secondaryFiles: [.tbi] + outputSource: bcftools-concat/vcf + summary: + type: File + outputSource: totalcounts/summary + +steps: + getfiles: + run: getfiles.cwl + in: + sample: sample + chrs: chrs + vcfdir: vcfdir + gnomaddir: gnomaddir + out: [samples, vcfs, gnomads] + + preprocess: + run: preprocess.cwl + scatter: [sample, vcf] + scatterMethod: dotproduct + in: + sample: getfiles/samples + vcf: getfiles/vcfs + out: [trimmedvcf] + + snpeff-bcftools-annotate: + run: snpeff-bcftools-annotate.cwl + scatter: [sample, vcf, gnomad] + scatterMethod: dotproduct + in: + vcf: preprocess/trimmedvcf + sample: getfiles/samples + snpeffdatadir: snpeffdatadir + genomeversion: genomeversion + dbsnp: dbsnp + gnomad: getfiles/gnomads + out: [annotatedvcf] + + bcftools-concat: + run: bcftools-concat.cwl + in: + sample: sample + vcfs: snpeff-bcftools-annotate/annotatedvcf + out: [vcf] + + getcount: + run: getcount.cwl + scatter: [sample, vcf] + scatterMethod: dotproduct + in: + sample: getfiles/samples + vcf: snpeff-bcftools-annotate/annotatedvcf + out: [count] + + totalcounts: + run: totalcounts.cwl + in: + sample: sample + counts: getcount/count + out: [summary] diff --git a/cwl/annotation/bcftools-concat.cwl b/cwl/annotation/bcftools-concat.cwl new file mode 100644 index 0000000000..fe3b78bb1a --- /dev/null +++ b/cwl/annotation/bcftools-concat.cwl @@ -0,0 +1,40 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.1 +class: CommandLineTool +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: snpeff4.3t + ResourceRequirement: + coresMin: 2 + ramMin: 10000 + arv:RuntimeConstraints: + keep_cache: 20000 + outputDirType: keep_output_dir +inputs: + sample: string + vcfs: + type: File[] + secondaryFiles: [.tbi] +outputs: + vcf: + type: File + outputBinding: + glob: "*vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [bcftools, concat] +arguments: + - $(inputs.vcfs) + - "-Oz" + - prefix: "-o" + valueFrom: $(inputs.sample)_snpeff_dbsnp_gnomad.vcf.gz + - shellQuote: false + valueFrom: "&&" + - "tabix" + - $(inputs.sample)_snpeff_dbsnp_gnomad.vcf.gz diff --git a/cwl/annotation/getcount.cwl b/cwl/annotation/getcount.cwl new file mode 100644 index 0000000000..f2437749d1 --- /dev/null +++ b/cwl/annotation/getcount.cwl @@ -0,0 +1,27 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sample: string + vcf: File + bashscript: + type: File + default: + class: File + location: src/getcount.sh +outputs: + count: + type: stdout +arguments: + - $(inputs.bashscript) + - $(inputs.sample) + - $(inputs.vcf) +stdout: $(inputs.sample).txt diff --git a/cwl/annotation/getfiles.cwl b/cwl/annotation/getfiles.cwl new file mode 100644 index 0000000000..da14c89abe --- /dev/null +++ b/cwl/annotation/getfiles.cwl @@ -0,0 +1,59 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: ExpressionTool +requirements: + InlineJavascriptRequirement: {} +hints: + LoadListingRequirement: + loadListing: shallow_listing +inputs: + sample: string + chrs: string[] + vcfdir: Directory + gnomaddir: Directory +outputs: + samples: string[] + vcfs: File[] + gnomads: + type: File[] + secondaryFiles: [.csi] +expression: | + ${ + var samples = []; + var vcfs = []; + var gnomads = []; + + for (var i = 0; i < inputs.chrs.length; i++) { + var chr = inputs.chrs[i]; + var sample = inputs.sample+"."+chr; + for (var j = 0; j < inputs.vcfdir.listing.length; j++) { + var file = inputs.vcfdir.listing[j]; + if (file.basename.includes("."+chr+".")) { + var vcf = file; + break; + } + } + for (var j = 0; j < inputs.gnomaddir.listing.length; j++) { + var file = inputs.gnomaddir.listing[j]; + if (file.basename.includes("."+chr+".")) { + var gnomad = file; + break; + } + } + for (var j = 0; j < inputs.gnomaddir.listing.length; j++) { + var file = inputs.gnomaddir.listing[j]; + if (file.basename == gnomad.basename+".csi") { + gnomad.secondaryFiles = [file]; + break; + } + } + samples.push(sample); + vcfs.push(vcf); + gnomads.push(gnomad); + } + + return {"samples": samples, "vcfs": vcfs, "gnomads": gnomads}; + } diff --git a/cwl/annotation/preprocess.cwl b/cwl/annotation/preprocess.cwl new file mode 100644 index 0000000000..b32f93acd7 --- /dev/null +++ b/cwl/annotation/preprocess.cwl @@ -0,0 +1,32 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sample: string + vcf: File +outputs: + trimmedvcf: + type: File + outputBinding: + glob: "*vcf.gz" +baseCommand: awk +arguments: + - '{if ($1 ~ /^#/ || $4 != $5) print $0}' + - $(inputs.vcf) + - shellQuote: False + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: False + valueFrom: ">" + - $(inputs.sample).vcf.gz diff --git a/cwl/annotation/snpeff-bcftools-annotate.cwl b/cwl/annotation/snpeff-bcftools-annotate.cwl new file mode 100644 index 0000000000..d7b1e816dd --- /dev/null +++ b/cwl/annotation/snpeff-bcftools-annotate.cwl @@ -0,0 +1,103 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +label: Run SnpEff on given VCF and use bcftools to annotate with dbSNP and gnomAD +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: snpeff4.3t + ResourceRequirement: + coresMin: 2 + ramMin: 20000 + tmpdirMin: 16000 +inputs: + vcf: + type: File + label: Input VCF + sample: + type: string + label: Sample name + snpeffdatadir: + type: Directory + label: Database directory for SnpEff + genomeversion: + type: string + label: Genome version + dbsnp: + type: File + label: dbSNP BCF + secondaryFiles: [.csi] + gnomad: + type: File + label: gnomAD BCF + secondaryFiles: [.csi] +outputs: + annotatedvcf: + type: File + label: Annotated VCF + outputBinding: + glob: "*_snpeff_dbsnp_gnomad.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [java] +arguments: + - -Xmx$(runtime.ram)m + - prefix: "-jar" + valueFrom: "/snpEff/snpEff.jar" + - prefix: "-dataDir" + valueFrom: $(inputs.snpeffdatadir) + - $(inputs.genomeversion) + - $(inputs.vcf) + - shellQuote: False + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: False + valueFrom: ">" + - $(inputs.sample)_snpeff.vcf.gz + - shellQuote: False + valueFrom: "&&" + - "tabix" + - $(inputs.sample)_snpeff.vcf.gz + - shellQuote: False + valueFrom: "&&" + - "bcftools" + - "annotate" + - prefix: "--annotations" + valueFrom: $(inputs.dbsnp) + - prefix: "--columns" + valueFrom: "=ID" + - $(inputs.sample)_snpeff.vcf.gz + - "-Oz" + - prefix: "-o" + valueFrom: $(inputs.sample)_snpeff_dbsnp.vcf.gz + - shellQuote: False + valueFrom: "&&" + - "tabix" + - $(inputs.sample)_snpeff_dbsnp.vcf.gz + - shellQuote: False + valueFrom: "&&" + - "bcftools" + - "annotate" + - prefix: "--annotations" + valueFrom: $(inputs.gnomad) + - prefix: "--columns" + valueFrom: "INFO/AC,INFO/AN,INFO/AF,INFO/AF_afr,INFO/AF_amr,INFO/AF_asj,INFO/AF_eas,INFO/AF_fin,INFO/AF_nfe,INFO/AF_oth" + - $(inputs.sample)_snpeff_dbsnp.vcf.gz + - "-Oz" + - prefix: "-o" + valueFrom: $(inputs.sample)_snpeff_dbsnp_gnomad.vcf.gz + - shellQuote: False + valueFrom: "&&" + - "tabix" + - $(inputs.sample)_snpeff_dbsnp_gnomad.vcf.gz + - shellQuote: False + valueFrom: "&&" + - "rm" + - $(inputs.sample)_snpeff.vcf.gz + - $(inputs.sample)_snpeff.vcf.gz.tbi + - $(inputs.sample)_snpeff_dbsnp.vcf.gz + - $(inputs.sample)_snpeff_dbsnp.vcf.gz.tbi diff --git a/cwl/annotation/src/getcount.sh b/cwl/annotation/src/getcount.sh new file mode 100755 index 0000000000..3641d697f3 --- /dev/null +++ b/cwl/annotation/src/getcount.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +set -e +set -o pipefail + +sample=$1 +vcf=$2 + +total=`zcat $vcf | awk '!(/^#/)' | wc -l` +rsid=`zcat $vcf | awk '(!(/^#/) && /rs/)' | wc -l` +gnomad=`zcat $vcf | awk '(!(/^#/) && /AF/)' | wc -l` +rsidpercentage=`awk -v n="$rsid" -v d="$total" 'BEGIN {print n/d*100}'` +gnomadpercentage=`awk -v n="$gnomad" -v d="$total" 'BEGIN {print n/d*100}'` + +echo "$sample: $total total variants, $rsid variants ($rsidpercentage%) have rsID, $gnomad variants ($gnomadpercentage%) have gnomad AF" diff --git a/cwl/annotation/src/totalcounts.sh b/cwl/annotation/src/totalcounts.sh new file mode 100755 index 0000000000..ac9ed9f4e7 --- /dev/null +++ b/cwl/annotation/src/totalcounts.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +set -e +set -o pipefail + +counts=( "$@" ) + +export allrsid="0" +export allgnomad="0" +export alltotal="0" + +cat ${counts[@]} + +for count in ${counts[@]}; do + rsid=`cut -d' ' -f5 $count` + gnomad=`cut -d' ' -f10 $count` + total=`cut -d' ' -f2 $count` + allrsid=`echo $(($allrsid + $rsid))` + allgnomad=`echo $(($allgnomad + $gnomad))` + alltotal=`echo $(($alltotal + $total))` +done +rsidpercentage=`awk -v n="$allrsid" -v d="$alltotal" 'BEGIN {print n/d*100}'` +gnomadpercentage=`awk -v n="$allgnomad" -v d="$alltotal" 'BEGIN {print n/d*100}'` + +echo "overall: $alltotal total variants, $allrsid variants ($rsidpercentage%) have rsID, $allgnomad variants ($gnomadpercentage%) have gnomad AF" diff --git a/cwl/annotation/totalcounts.cwl b/cwl/annotation/totalcounts.cwl new file mode 100644 index 0000000000..2994a24367 --- /dev/null +++ b/cwl/annotation/totalcounts.cwl @@ -0,0 +1,26 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sample: string + counts: File[] + bashscript: + type: File + default: + class: File + location: src/totalcounts.sh +outputs: + summary: + type: stdout +arguments: + - $(inputs.bashscript) + - $(inputs.counts) +stdout: $(inputs.sample)_summary.txt diff --git a/cwl/annotation/yml/annotate-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/annotation/yml/annotate-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml new file mode 100644 index 0000000000..b9e43a273c --- /dev/null +++ b/cwl/annotation/yml/annotate-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml @@ -0,0 +1,43 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sample: "annotations" +chrs: + - "chr1" + - "chr2" + - "chr3" + - "chr4" + - "chr5" + - "chr6" + - "chr7" + - "chr8" + - "chr9" + - "chr10" + - "chr11" + - "chr12" + - "chr13" + - "chr14" + - "chr15" + - "chr16" + - "chr17" + - "chr18" + - "chr19" + - "chr20" + - "chr21" + - "chr22" + - "chrX" + - "chrY" +vcfdir: + class: Directory + location: keep:72c196515d6c449abc1fdbf1ead2a33e+1779 +snpeffdatadir: + class: Directory + location: keep:66c966928931de252274772c76f73025+52054 +genomeversion: "GRCh38.86" +dbsnp: + class: File + location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf +gnomaddir: + class: Directory + location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135 diff --git a/cwl/annotation/yml/preprocess-test.yml b/cwl/annotation/yml/preprocess-test.yml new file mode 100644 index 0000000000..db34e6d2d2 --- /dev/null +++ b/cwl/annotation/yml/preprocess-test.yml @@ -0,0 +1,8 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sample: "annotations" +vcf: + class: File + location: keep:3ba1b5c2ae01113baba4f9e69cd4b1bb+1566/annotations.chr1.vcf diff --git a/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19.yml b/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19.yml new file mode 100644 index 0000000000..b0c3683fce --- /dev/null +++ b/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19.yml @@ -0,0 +1,18 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcf: + class: File + location: keep:42c61d54769fec3e54b73901c817c16c+10426/annotations.chr19.vcf +sample: "annotations.chr19" +snpeffdatadir: + class: Directory + location: keep:66c966928931de252274772c76f73025+52054 +genomeversion: "GRCh38.86" +dbsnp: + class: File + location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf +gnomad: + class: File + location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135/gnomad.genomes.v3.1.2.sites.chr19.bcf diff --git a/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19_new.yml b/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19_new.yml new file mode 100644 index 0000000000..b0c3683fce --- /dev/null +++ b/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr19_new.yml @@ -0,0 +1,18 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcf: + class: File + location: keep:42c61d54769fec3e54b73901c817c16c+10426/annotations.chr19.vcf +sample: "annotations.chr19" +snpeffdatadir: + class: Directory + location: keep:66c966928931de252274772c76f73025+52054 +genomeversion: "GRCh38.86" +dbsnp: + class: File + location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf +gnomad: + class: File + location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135/gnomad.genomes.v3.1.2.sites.chr19.bcf diff --git a/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr22.yml b/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr22.yml new file mode 100644 index 0000000000..d77903f206 --- /dev/null +++ b/cwl/annotation/yml/snpeff-bcftools-annotate-annotation_chr22.yml @@ -0,0 +1,18 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcf: + class: File + location: keep:0f0a172c371fddeee3c3d1a500bbcf66+4438/annotations.chr22.vcf.gz +sample: "annotations.chr22" +snpeffdatadir: + class: Directory + location: keep:66c966928931de252274772c76f73025+52054 +genomeversion: "GRCh38.86" +dbsnp: + class: File + location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf +gnomad: + class: File + location: keep:3b02d9312ad6bc1dcf526d66ce02ed59+10572/gnomad.genomes.v3.1.2.sites.chr22.bcf diff --git a/cwl/comparevcf/change-GT.cwl b/cwl/comparevcf/change-GT.cwl new file mode 100644 index 0000000000..b92666e1d1 --- /dev/null +++ b/cwl/comparevcf/change-GT.cwl @@ -0,0 +1,29 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.2 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sampleid: string + suffix: string + vcf: File + header: File + bashscript: File +outputs: + modifiedvcf: + type: File + outputBinding: + glob: "*vcf.gz" + secondaryFiles: [.tbi] +arguments: + - $(inputs.bashscript) + - $(inputs.sampleid) + - $(inputs.suffix) + - $(inputs.vcf) + - $(inputs.header) diff --git a/cwl/comparevcf/comparevcf-original-lightning-wf.cwl b/cwl/comparevcf/comparevcf-original-lightning-wf.cwl new file mode 100644 index 0000000000..4b38fba749 --- /dev/null +++ b/cwl/comparevcf/comparevcf-original-lightning-wf.cwl @@ -0,0 +1,66 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.2 +class: Workflow +requirements: + StepInputExpressionRequirement: {} +inputs: + sampleid: string + vcf: File + nocallbed: File + lightningvcf: File + sdf: Directory + bashscript: File + header: File + +outputs: + modifiedoriginalvcf: + type: File + outputSource: change-GT_original/modifiedvcf + modifiedlightningvcf: + type: File + outputSource: change-GT_lightning/modifiedvcf + evaldir: + type: Directory + outputSource: rtg-vcfeval/evaldir + +steps: + rtg-vcffilter: + run: ../imputation/rtg-vcffilter.cwl + in: + sample: sampleid + vcf: vcf + excludebed: nocallbed + out: [filteredvcf] + + change-GT_original: + run: change-GT.cwl + in: + sampleid: sampleid + suffix: + valueFrom: "original" + vcf: rtg-vcffilter/filteredvcf + header: header + bashscript: bashscript + out: [modifiedvcf] + + change-GT_lightning: + run: change-GT.cwl + in: + sampleid: sampleid + suffix: + valueFrom: "lightning" + vcf: lightningvcf + header: header + bashscript: bashscript + out: [modifiedvcf] + + rtg-vcfeval: + run: rtg-vcfeval.cwl + in: + baselinevcf: change-GT_original/modifiedvcf + callsvcf: change-GT_lightning/modifiedvcf + sdf: sdf + out: [evaldir] diff --git a/cwl/comparevcf/rtg-vcfeval.cwl b/cwl/comparevcf/rtg-vcfeval.cwl new file mode 100644 index 0000000000..9f5dda374e --- /dev/null +++ b/cwl/comparevcf/rtg-vcfeval.cwl @@ -0,0 +1,35 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.2 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + baselinevcf: + type: File + secondaryFiles: [.tbi] + callsvcf: + type: File + secondaryFiles: [.tbi] + sdf: + type: Directory +outputs: + evaldir: + type: Directory + outputBinding: + glob: "eval" +baseCommand: [rtg, vcfeval] +arguments: + - prefix: "-b" + valueFrom: $(inputs.baselinevcf) + - prefix: "-c" + valueFrom: $(inputs.callsvcf) + - prefix: "-t" + valueFrom: $(inputs.sdf) + - prefix: "-o" + valueFrom: "eval" diff --git a/cwl/comparevcf/src/change-GT.sh b/cwl/comparevcf/src/change-GT.sh new file mode 100755 index 0000000000..6dfad07f0d --- /dev/null +++ b/cwl/comparevcf/src/change-GT.sh @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/bin/bash + +set -eo pipefail + +sampleid=$1 +suffix=$2 +vcf=$3 +header=$4 + +cat $header <(bgzip -dc $vcf | egrep -v ^# | awk '{if ($4 != $5) print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6 "\t" $7 "\t" $8 "\tGT\t0/1"}') | bgzip -c > "$sampleid"_"$suffix".vcf.gz +tabix "$sampleid"_"$suffix".vcf.gz diff --git a/cwl/comparevcf/src/concatenate.sh b/cwl/comparevcf/src/concatenate.sh new file mode 100755 index 0000000000..94ae743efe --- /dev/null +++ b/cwl/comparevcf/src/concatenate.sh @@ -0,0 +1,18 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/bin/bash + +set -e +set -o pipefail + +lightningvcfdir="$1" + +chrs=`seq 22` +chrs+=("X" "Y" "M") + +for chr in ${chrs[@]}; do + vcf=`ls $lightningvcfdir/*.chr$chr.*` + egrep -v ^# $vcf +done diff --git a/cwl/comparevcf/yml/comparevcf-original-lightning-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/comparevcf/yml/comparevcf-original-lightning-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml new file mode 100644 index 0000000000..e26b70e333 --- /dev/null +++ b/cwl/comparevcf/yml/comparevcf-original-lightning-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml @@ -0,0 +1,23 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "A-PRHS-PR000971-BL-COL-47620BL1" +vcf: + class: File + location: keep:ebd528706c3192b77e56ebe93361194e+553/A-PRHS-PR000971-BL-COL-47620BL1_varonly.vcf.gz +nocallbed: + class: File + location: keep:ebd528706c3192b77e56ebe93361194e+553/A-PRHS-PR000971-BL-COL-47620BL1_nocall.bed +lightningvcf: + class: File + location: keep:4626bd8ca6b3ed8f1ef32dfd46815505+378/GRCh38.86_library_snpeff_dbsnp_gnomad.vcf.gz +sdf: + class: Directory + location: keep:88c64c60daf829f30187fbcb423a46fc+1355/hg38 +bashscript: + class: File + location: ../src/change-GT.sh +header: + class: File + location: keep:73394b577d9109cd266f5facfbd1e000+54/header diff --git a/cwl/gvcf2fasta/append-sampleid.cwl b/cwl/gvcf2fasta/append-sampleid.cwl new file mode 100644 index 0000000000..f8d8193362 --- /dev/null +++ b/cwl/gvcf2fasta/append-sampleid.cwl @@ -0,0 +1,18 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: ExpressionTool +inputs: + sampleid: string + suffix: string +outputs: + appendedsampleid: string +requirements: + InlineJavascriptRequirement: {} +expression: | + ${ + var appendedsampleid = inputs.sampleid + inputs.suffix; + return {"appendedsampleid": appendedsampleid}; + } diff --git a/cwl/gvcf2fasta/bcftools-consensus.cwl b/cwl/gvcf2fasta/bcftools-consensus.cwl new file mode 100644 index 0000000000..8362b9ec48 --- /dev/null +++ b/cwl/gvcf2fasta/bcftools-consensus.cwl @@ -0,0 +1,44 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +label: Convert VCF to FASTA with bcftools consensus +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sampleid: + type: string + label: sample ID + vcf: + type: File + label: Input VCF + secondaryFiles: [.tbi] + ref: + type: File + label: Reference FASTA + mask: + type: File + label: Mask BED region where FASTA sequence is filled with 'N' + bashscript: + type: File + label: Script to run bcftools consensus + default: + class: File + location: src/bcftools-consensus.sh +outputs: + fas: + type: File[] + label: Output FASTAs + outputBinding: + glob: "*fa.gz" +arguments: + - $(inputs.bashscript) + - $(inputs.sampleid) + - $(inputs.vcf) + - $(inputs.ref) + - $(inputs.mask) diff --git a/cwl/gvcf2fasta/concat-get_bed_varonlyvcf.cwl b/cwl/gvcf2fasta/concat-get_bed_varonlyvcf.cwl new file mode 100644 index 0000000000..5ddd3f3bf1 --- /dev/null +++ b/cwl/gvcf2fasta/concat-get_bed_varonlyvcf.cwl @@ -0,0 +1,52 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +label: Concatenate and get no call BED and variant only VCF from gVCF +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 + outdirMin: 40000 +inputs: + sampleid: + type: string + label: Sample ID + splitvcfdir: + type: Directory + label: Input directory of split gVCFs + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + bashscript: + type: File + label: Script to untar and concatenate vcf tar ball + default: + class: File + location: src/concat-get_bed_varonlyvcf.sh +outputs: + nocallbed: + type: File + label: No call BED of gVCF + outputBinding: + glob: "*_nocall.bed" + varonlyvcf: + type: File + label: Variant only VCF + outputBinding: + glob: "*_varonly.vcf.gz" + secondaryFiles: [.tbi] +arguments: + - $(inputs.bashscript) + - $(inputs.sampleid) + - $(inputs.splitvcfdir) + - $(inputs.gqcutoff) + - $(inputs.genomebed) diff --git a/cwl/gvcf2fasta/fixvcf-get_bed_varonlyvcf.cwl b/cwl/gvcf2fasta/fixvcf-get_bed_varonlyvcf.cwl new file mode 100644 index 0000000000..7098c09cf2 --- /dev/null +++ b/cwl/gvcf2fasta/fixvcf-get_bed_varonlyvcf.cwl @@ -0,0 +1,49 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +label: Fix gVCF and get no call BED and variant only VCF from gVCF +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 15000 +inputs: + bashscript: + type: File + label: Bash script + default: + class: File + location: src/fixvcf-get_bed_varonlyvcf.sh + sampleid: + type: string + label: Sample ID + vcf: + type: File + label: Input gVCF + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED +outputs: + nocallbed: + type: File + label: No call BED of gVCF + outputBinding: + glob: "*_nocall.bed" + varonlyvcf: + type: File + label: Variant only VCF + outputBinding: + glob: "*_varonly.vcf.gz" + secondaryFiles: [.tbi] +arguments: + - $(inputs.bashscript) + - $(inputs.sampleid) + - $(inputs.vcf) + - $(inputs.gqcutoff) + - $(inputs.genomebed) diff --git a/cwl/gvcf2fasta/get_bed_varonlyvcf.cwl b/cwl/gvcf2fasta/get_bed_varonlyvcf.cwl new file mode 100644 index 0000000000..b1c9aa861c --- /dev/null +++ b/cwl/gvcf2fasta/get_bed_varonlyvcf.cwl @@ -0,0 +1,80 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +label: Get no call BED and variant only VCF from gVCF +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 + outdirMin: 40000 +inputs: + sampleid: + type: string + label: Sample ID + vcf: + type: File + label: Input gVCF + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED +outputs: + nocallbed: + type: File + label: No call BED of gVCF + outputBinding: + glob: "*_nocall.bed" + varonlyvcf: + type: File + label: Variant only VCF + outputBinding: + glob: "*_varonly.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: /gvcf_regions/gvcf_regions.py +arguments: + - prefix: "--min_GQ" + valueFrom: $(inputs.gqcutoff) + - $(inputs.vcf) + - shellQuote: False + valueFrom: ">" + - $(inputs.sampleid).bed + - shellQuote: False + valueFrom: "&&" + - "bedtools" + - "subtract" + - prefix: "-a" + valueFrom: $(inputs.genomebed) + - prefix: "-b" + valueFrom: $(inputs.sampleid).bed + - shellQuote: False + valueFrom: ">" + - $(inputs.sampleid)_nocall.bed + - shellQuote: False + valueFrom: "&&" + - "bgzip" + - "-dc" + - $(inputs.vcf) + - shellQuote: False + valueFrom: "|" + - "grep" + - "-v" + - "END=" + - shellQuote: False + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: False + valueFrom: ">" + - $(inputs.sampleid)_varonly.vcf.gz + - shellQuote: False + valueFrom: "&&" + - "tabix" + - $(inputs.sampleid)_varonly.vcf.gz diff --git a/cwl/gvcf2fasta/getfiles.cwl b/cwl/gvcf2fasta/getfiles.cwl new file mode 100644 index 0000000000..3b9e6a619c --- /dev/null +++ b/cwl/gvcf2fasta/getfiles.cwl @@ -0,0 +1,37 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: ExpressionTool +label: Create list of VCFs and sample names +hints: + LoadListingRequirement: + loadListing: shallow_listing +inputs: + dir: + type: Directory + label: Input directory of VCFs +outputs: + vcfs: + type: File[] + label: Output VCFs + samples: + type: string[] + label: Sample names of VCFs +requirements: + InlineJavascriptRequirement: {} +expression: | + ${ + var vcfs = []; + var samples = []; + for (var i = 0; i < inputs.dir.listing.length; i++) { + var file = inputs.dir.listing[i]; + if (file.nameext == ".gz") { + vcfs.push(file); + var sample = file.basename.split(".").slice(0, -2).join("."); + samples.push(sample); + } + } + return {"vcfs": vcfs, "samples": samples}; + } diff --git a/cwl/gvcf2fasta/gvcf2fasta-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta-wf.cwl new file mode 100644 index 0000000000..daaf1722b6 --- /dev/null +++ b/cwl/gvcf2fasta/gvcf2fasta-wf.cwl @@ -0,0 +1,62 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: Workflow +label: Convert gVCF to FASTA +requirements: + ScatterFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 + +inputs: + sampleid: + type: string + label: Sample ID + vcf: + type: File + label: Input gVCF + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + ref: + type: File + label: Reference FASTA + haplotypes: + type: int[] + label: Haplotypes of sample + default: [1, 2] + +outputs: + fas: + type: File[] + label: Output pair of FASTAs + outputSource: bcftools-consensus/fa + +steps: + get_bed_varonlyvcf: + run: get_bed_varonlyvcf.cwl + in: + sampleid: sampleid + vcf: vcf + gqcutoff: gqcutoff + genomebed: genomebed + out: [nocallbed, varonlyvcf] + + bcftools-consensus: + run: bcftools-consensus.cwl + scatter: haplotype + in: + sampleid: sampleid + vcf: get_bed_varonlyvcf/varonlyvcf + ref: ref + haplotype: haplotypes + mask: get_bed_varonlyvcf/nocallbed + out: [fa] diff --git a/cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-imputation-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-imputation-wf.cwl new file mode 100644 index 0000000000..19754587ff --- /dev/null +++ b/cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-imputation-wf.cwl @@ -0,0 +1,78 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: Workflow +label: Impute gVCF and convert to FASTA for gVCF with NON_REF +requirements: + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + +inputs: + sampleid: + type: string + label: Sample ID + vcf: + type: File + label: Input gVCF + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + ref: + type: File + label: Reference FASTA + haplotypes: + type: int[] + label: Haplotypes of sample + default: [1, 2] + chrs: + type: string[] + refsdir: Directory + mapsdir: Directory + panelnocallbed: File + +outputs: + fas: + type: File[] + label: Output pair of FASTAs + outputSource: bcftools-consensus/fa + +steps: + fixvcf-get_bed_varonlyvcf: + run: fixvcf-get_bed_varonlyvcf.cwl + in: + sampleid: sampleid + vcf: vcf + gqcutoff: gqcutoff + genomebed: genomebed + out: [nocallbed, varonlyvcf] + + imputation-wf: + run: ../imputation/imputation-wf.cwl + in: + sample: sampleid + chrs: chrs + refsdir: refsdir + mapsdir: mapsdir + vcf: fixvcf-get_bed_varonlyvcf/varonlyvcf + nocallbed: fixvcf-get_bed_varonlyvcf/nocallbed + panelnocallbed: panelnocallbed + out: [phasedimputedvcf, phasedimputednocallbed] + + bcftools-consensus: + run: bcftools-consensus.cwl + scatter: haplotype + in: + sampleid: sampleid + vcf: imputation-wf/phasedimputedvcf + ref: ref + haplotype: haplotypes + mask: imputation-wf/phasedimputednocallbed + out: [fa] diff --git a/cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-wf.cwl new file mode 100644 index 0000000000..5128e9bcee --- /dev/null +++ b/cwl/gvcf2fasta/gvcf2fasta_nonrefvcf-wf.cwl @@ -0,0 +1,54 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: Workflow +label: Convert gVCF to FASTA for gVCF with NON_REF +requirements: + ScatterFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + +inputs: + sampleid: + type: string + label: Sample ID + vcf: + type: File + label: Input gVCF + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + ref: + type: File + label: Reference FASTA + +outputs: + fas: + type: File[] + label: Output pair of FASTAs + outputSource: bcftools-consensus/fas + +steps: + fixvcf-get_bed_varonlyvcf: + run: fixvcf-get_bed_varonlyvcf.cwl + in: + sampleid: sampleid + vcf: vcf + gqcutoff: gqcutoff + genomebed: genomebed + out: [nocallbed, varonlyvcf] + + bcftools-consensus: + run: bcftools-consensus.cwl + in: + sampleid: sampleid + vcf: fixvcf-get_bed_varonlyvcf/varonlyvcf + ref: ref + mask: fixvcf-get_bed_varonlyvcf/nocallbed + out: [fas] diff --git a/cwl/gvcf2fasta/gvcf2fasta_splitvcf-imputation-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta_splitvcf-imputation-wf.cwl new file mode 100644 index 0000000000..08728ca9a4 --- /dev/null +++ b/cwl/gvcf2fasta/gvcf2fasta_splitvcf-imputation-wf.cwl @@ -0,0 +1,82 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: Workflow +label: Impute gVCF and convert to FASTA for gVCF split by chromosome +requirements: + SubworkflowFeatureRequirement: {} + StepInputExpressionRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + +inputs: + sampleid: + type: string + label: Sample ID + splitvcfdir: + type: Directory + label: Input directory of split gVCFs + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + ref: + type: File + label: Reference FASTA + chrs: string[] + refsdir: Directory + mapsdir: Directory + panelnocallbed: File + panelcallbed: File + +outputs: + fas: + type: File[] + label: Output pair of FASTAs + outputSource: bcftools-consensus/fas + +steps: + concat-get_bed_varonlyvcf: + run: concat-get_bed_varonlyvcf.cwl + in: + sampleid: sampleid + splitvcfdir: splitvcfdir + gqcutoff: gqcutoff + genomebed: genomebed + out: [nocallbed, varonlyvcf] + + imputation-wf: + run: ../imputation/imputation-wf.cwl + in: + sample: sampleid + chrs: chrs + refsdir: refsdir + mapsdir: mapsdir + vcf: concat-get_bed_varonlyvcf/varonlyvcf + nocallbed: concat-get_bed_varonlyvcf/nocallbed + panelnocallbed: panelnocallbed + panelcallbed: panelcallbed + genomebed: genomebed + out: [phasedimputedvcf, phasedimputednocallbed] + + append-sampleid: + run: append-sampleid.cwl + in: + sampleid: sampleid + suffix: + valueFrom: "_phased_imputed" + out: [appendedsampleid] + + bcftools-consensus: + run: bcftools-consensus.cwl + in: + sampleid: append-sampleid/appendedsampleid + vcf: imputation-wf/phasedimputedvcf + ref: ref + mask: imputation-wf/phasedimputednocallbed + out: [fas] diff --git a/cwl/gvcf2fasta/gvcf2fasta_splitvcf-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta_splitvcf-wf.cwl new file mode 100644 index 0000000000..3e9d7217d9 --- /dev/null +++ b/cwl/gvcf2fasta/gvcf2fasta_splitvcf-wf.cwl @@ -0,0 +1,56 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: Workflow +label: Convert gVCF to FASTA for gVCF split by chromosome +requirements: + ScatterFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 + +inputs: + sampleid: + type: string + label: Sample ID + splitvcfdir: + type: Directory + label: Input directory of split gVCFs + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + ref: + type: File + label: Reference FASTA + +outputs: + fas: + type: File[] + label: Output pair of FASTAs + outputSource: bcftools-consensus/fas + +steps: + concat-get_bed_varonlyvcf: + run: concat-get_bed_varonlyvcf.cwl + in: + sampleid: sampleid + splitvcfdir: splitvcfdir + gqcutoff: gqcutoff + genomebed: genomebed + out: [nocallbed, varonlyvcf] + + bcftools-consensus: + run: bcftools-consensus.cwl + in: + sampleid: sampleid + vcf: concat-get_bed_varonlyvcf/varonlyvcf + ref: ref + mask: concat-get_bed_varonlyvcf/nocallbed + out: [fas] diff --git a/cwl/gvcf2fasta/gvcf2fasta_splitvcftar-wf.cwl b/cwl/gvcf2fasta/gvcf2fasta_splitvcftar-wf.cwl new file mode 100644 index 0000000000..90722b621d --- /dev/null +++ b/cwl/gvcf2fasta/gvcf2fasta_splitvcftar-wf.cwl @@ -0,0 +1,62 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: Workflow +label: Convert gVCF to FASTA for gVCF tar split by chromosome +requirements: + ScatterFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 + +inputs: + sampleid: + type: string + label: Sample ID + vcftar: + type: File + label: Input gVCF tar + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + ref: + type: File + label: Reference FASTA + haplotypes: + type: int[] + label: Haplotypes of sample + default: [1, 2] + +outputs: + fas: + type: File[] + label: Output pair of FASTAs + outputSource: bcftools-consensus/fa + +steps: + untar-concat-get_bed_varonlyvcf: + run: untar-concat-get_bed_varonlyvcf.cwl + in: + sampleid: sampleid + vcftar: vcftar + gqcutoff: gqcutoff + genomebed: genomebed + out: [nocallbed, varonlyvcf] + + bcftools-consensus: + run: bcftools-consensus.cwl + scatter: haplotype + in: + sampleid: sampleid + vcf: untar-concat-get_bed_varonlyvcf/varonlyvcf + ref: ref + haplotype: haplotypes + mask: untar-concat-get_bed_varonlyvcf/nocallbed + out: [fa] diff --git a/cwl/gvcf2fasta/scatter-gvcf2fasta-wf.cwl b/cwl/gvcf2fasta/scatter-gvcf2fasta-wf.cwl new file mode 100644 index 0000000000..8309aa3169 --- /dev/null +++ b/cwl/gvcf2fasta/scatter-gvcf2fasta-wf.cwl @@ -0,0 +1,55 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.1 +class: Workflow +label: Scatter to Convert gVCF to FASTA +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + arv:IntermediateOutput: + outputTTL: 604800 + +inputs: + vcfsdir: + type: Directory + label: Input directory of VCFs + genomebed: + type: File + label: Whole genome BED + ref: + type: File + label: Reference FASTA + +outputs: + fas: + type: + type: array + items: + type: array + items: File + label: Output pairs of FASTAs + outputSource: gvcf2fasta-wf/fas + +steps: + getfiles: + run: getfiles.cwl + in: + dir: vcfsdir + out: [vcfs, samples] + gvcf2fasta-wf: + run: gvcf2fasta-wf.cwl + scatter: [sampleid, vcf] + scatterMethod: dotproduct + in: + sampleid: getfiles/samples + vcf: getfiles/vcfs + genomebed: genomebed + ref: ref + out: [fas] diff --git a/cwl/gvcf2fasta/scatter-gvcf2fasta_nonrefvcf-wf.cwl b/cwl/gvcf2fasta/scatter-gvcf2fasta_nonrefvcf-wf.cwl new file mode 100644 index 0000000000..fcee4d5cf3 --- /dev/null +++ b/cwl/gvcf2fasta/scatter-gvcf2fasta_nonrefvcf-wf.cwl @@ -0,0 +1,57 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.1 +class: Workflow +label: Scatter to Convert gVCF to FASTA with NON_REF +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + arv:IntermediateOutput: + outputTTL: 604800 + +inputs: + sampleids: + type: string[] + label: Sample IDs + vcfs: + type: File[] + label: Input VCFs + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + ref: + type: File + label: Reference FASTA + +outputs: + fas: + type: + type: array + items: + type: array + items: File + label: Output pairs of FASTAs + outputSource: gvcf2fasta_nonrefvcf-wf/fas + +steps: + gvcf2fasta_nonrefvcf-wf: + run: gvcf2fasta_nonrefvcf-wf.cwl + scatter: [sampleid, vcf] + scatterMethod: dotproduct + in: + sampleid: sampleids + vcf: vcfs + gqcutoff: gqcutoff + genomebed: genomebed + ref: ref + out: [fas] diff --git a/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-imputation-wf.cwl b/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-imputation-wf.cwl new file mode 100644 index 0000000000..5fc067c96a --- /dev/null +++ b/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-imputation-wf.cwl @@ -0,0 +1,65 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.1 +class: Workflow +label: Scatter to impute gVCF and convert gVCF to FASTA +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + +inputs: + sampleids: + type: string[] + label: Sample IDs + splitvcfdirs: + type: Directory[] + label: Input directory of split gVCFs + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + ref: + type: File + label: Reference FASTA + chrs: string[] + refsdir: Directory + mapsdir: Directory + panelnocallbed: File + panelcallbed: File + +outputs: + fas: + type: + type: array + items: + type: array + items: File + label: Output pairs of FASTAs + outputSource: gvcf2fasta_splitvcf-imputation-wf/fas + +steps: + gvcf2fasta_splitvcf-imputation-wf: + run: gvcf2fasta_splitvcf-imputation-wf.cwl + scatter: [sampleid, splitvcfdir] + scatterMethod: dotproduct + in: + sampleid: sampleids + splitvcfdir: splitvcfdirs + gqcutoff: gqcutoff + genomebed: genomebed + ref: ref + chrs: chrs + refsdir: refsdir + mapsdir: mapsdir + panelnocallbed: panelnocallbed + panelcallbed: panelcallbed + out: [fas] diff --git a/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-wf.cwl b/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-wf.cwl new file mode 100644 index 0000000000..b50e26977a --- /dev/null +++ b/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcf-wf.cwl @@ -0,0 +1,55 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.1 +class: Workflow +label: Scatter to convert gVCF to FASTA +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + +inputs: + sampleids: + type: string[] + label: Sample IDs + splitvcfdirs: + type: Directory[] + label: Input directory of split gVCFs + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + ref: + type: File + label: Reference FASTA + +outputs: + fas: + type: + type: array + items: + type: array + items: File + label: Output pairs of FASTAs + outputSource: gvcf2fasta_splitvcf-wf/fas + +steps: + gvcf2fasta_splitvcf-wf: + run: gvcf2fasta_splitvcf-wf.cwl + scatter: [sampleid, splitvcfdir] + scatterMethod: dotproduct + in: + sampleid: sampleids + splitvcfdir: splitvcfdirs + gqcutoff: gqcutoff + genomebed: genomebed + ref: ref + out: [fas] diff --git a/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcftar-wf.cwl b/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcftar-wf.cwl new file mode 100644 index 0000000000..0899a024e3 --- /dev/null +++ b/cwl/gvcf2fasta/scatter-gvcf2fasta_splitvcftar-wf.cwl @@ -0,0 +1,57 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.1 +class: Workflow +label: Scatter to Convert gVCF to FASTA +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + arv:IntermediateOutput: + outputTTL: 604800 + +inputs: + sampleids: + type: string[] + label: Sample IDs + vcftars: + type: File[] + label: Input VCF tars + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + ref: + type: File + label: Reference FASTA + +outputs: + fas: + type: + type: array + items: + type: array + items: File + label: Output pairs of FASTAs + outputSource: gvcf2fasta_splitvcf-wf/fas + +steps: + gvcf2fasta_splitvcf-wf: + run: gvcf2fasta_splitvcf-wf.cwl + scatter: [sampleid, vcftar] + scatterMethod: dotproduct + in: + sampleid: sampleids + vcftar: vcftars + gqcutoff: gqcutoff + genomebed: genomebed + ref: ref + out: [fas] diff --git a/cwl/gvcf2fasta/src/bcftools-consensus.sh b/cwl/gvcf2fasta/src/bcftools-consensus.sh new file mode 100755 index 0000000000..531a7c6056 --- /dev/null +++ b/cwl/gvcf2fasta/src/bcftools-consensus.sh @@ -0,0 +1,18 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/bin/bash + +set -eo pipefail + +sampleid="$1" +vcf="$2" +ref="$3" +mask="$4" + +haplotypes=(1 2) + +for haplotype in ${haplotypes[@]}; do + bcftools consensus --fasta-ref $ref --haplotype $haplotype --mask $mask $vcf | bgzip -c > "$sampleid"."$haplotype".fa.gz +done diff --git a/cwl/gvcf2fasta/src/concat-get_bed_varonlyvcf.sh b/cwl/gvcf2fasta/src/concat-get_bed_varonlyvcf.sh new file mode 100755 index 0000000000..43f1a91244 --- /dev/null +++ b/cwl/gvcf2fasta/src/concat-get_bed_varonlyvcf.sh @@ -0,0 +1,25 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/bin/bash + +set -eo pipefail + +sampleid="$1" +splitvcfdir="$2" +gqcutoff="$3" +genomebed="$4" + +chroms=(chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY chrM) +splitvcfs=$(for chrom in ${chroms[@]}; do ls $splitvcfdir/*$chrom\.*gz; done) +echo "splitvcfs: ${splitvcfs[@]}" + +bcftools concat ${splitvcfs[@]} -n | bcftools view --trim-alt-alleles | egrep -v "\*|" | tee \ + >( /gvcf_regions/gvcf_regions.py --min_GQ $gqcutoff - > "$sampleid".bed ) \ + >( awk '{if ($5 != ".") print $0}' | bgzip -c > "$sampleid"_varonly.vcf.gz ) \ + > /dev/null + +bedtools subtract -a $genomebed -b "$sampleid".bed > "$sampleid"_nocall.bed +rm "$sampleid".bed +tabix "$sampleid"_varonly.vcf.gz diff --git a/cwl/gvcf2fasta/src/fixvcf-get_bed_varonlyvcf.sh b/cwl/gvcf2fasta/src/fixvcf-get_bed_varonlyvcf.sh new file mode 100755 index 0000000000..2ed5369721 --- /dev/null +++ b/cwl/gvcf2fasta/src/fixvcf-get_bed_varonlyvcf.sh @@ -0,0 +1,21 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/bin/bash + +set -eo pipefail + +sampleid="$1" +vcf="$2" +gqcutoff="$3" +genomebed="$4" + +bcftools view --trim-alt-alleles $vcf | egrep -v "\*|" | tee \ + >( /gvcf_regions/gvcf_regions.py --min_GQ $gqcutoff - > "$sampleid".bed ) \ + >( rtg vcffilter -i - -o - --remove-overlapping | awk '{if ($5 != ".") print $0}' | bgzip -c > "$sampleid"_varonly.vcf.gz ) \ + > /dev/null + +bedtools subtract -a $genomebed -b "$sampleid".bed > "$sampleid"_nocall.bed +rm "$sampleid".bed +tabix "$sampleid"_varonly.vcf.gz diff --git a/cwl/gvcf2fasta/src/untar-concat.sh b/cwl/gvcf2fasta/src/untar-concat.sh new file mode 100755 index 0000000000..e10d607820 --- /dev/null +++ b/cwl/gvcf2fasta/src/untar-concat.sh @@ -0,0 +1,21 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/bin/bash + +set -eo pipefail + +sampleid="$1" +vcftar="$2" + +tar -xzf $vcftar -C . +splitvcfdir=`ls` + +chroms=(chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY chrM) +splitvcfs=$(for chrom in ${chroms[@]}; do ls $splitvcfdir/*$chrom\_*gz; done) +echo "splitvcfs: ${splitvcfs[@]}" + +bcftools concat ${splitvcfs[@]} -n -O z -o $sampleid.vcf.gz + +rm -rf $splitvcfdir diff --git a/cwl/gvcf2fasta/untar-concat-get_bed_varonlyvcf.cwl b/cwl/gvcf2fasta/untar-concat-get_bed_varonlyvcf.cwl new file mode 100644 index 0000000000..fb621e1ac2 --- /dev/null +++ b/cwl/gvcf2fasta/untar-concat-get_bed_varonlyvcf.cwl @@ -0,0 +1,97 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +label: Untar, concatenate, and get no call BED and variant only VCF from gVCF +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 + outdirMin: 40000 +inputs: + sampleid: + type: string + label: Sample ID + vcftar: + type: File + label: Input gVCF tar + gqcutoff: + type: int + label: GQ (Genotype Quality) cutoff for filtering + genomebed: + type: File + label: Whole genome BED + bashscript: + type: File + label: Script to untar and concatenate vcf tar ball + default: + class: File + location: src/untar-concat.sh +outputs: + nocallbed: + type: File + label: No call BED of gVCF + outputBinding: + glob: "*_nocall.bed" + varonlyvcf: + type: File + label: Variant only VCF + outputBinding: + glob: "*_varonly.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: bash +arguments: + - $(inputs.bashscript) + - $(inputs.sampleid) + - $(inputs.vcftar) + - shellQuote: False + valueFrom: "&&" + - "/gvcf_regions/gvcf_regions.py" + - prefix: "--min_GQ" + valueFrom: $(inputs.gqcutoff) + - $(inputs.sampleid).vcf.gz + - shellQuote: False + valueFrom: ">" + - $(inputs.sampleid).bed + - shellQuote: False + valueFrom: "&&" + - "bedtools" + - "subtract" + - prefix: "-a" + valueFrom: $(inputs.genomebed) + - prefix: "-b" + valueFrom: $(inputs.sampleid).bed + - shellQuote: False + valueFrom: ">" + - $(inputs.sampleid)_nocall.bed + - shellQuote: False + valueFrom: "&&" + - "bgzip" + - "-dc" + - $(inputs.sampleid).vcf.gz + - shellQuote: False + valueFrom: "|" + - "grep" + - "-v" + - "END=" + - shellQuote: False + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: False + valueFrom: ">" + - $(inputs.sampleid)_varonly.vcf.gz + - shellQuote: False + valueFrom: "&&" + - "tabix" + - $(inputs.sampleid)_varonly.vcf.gz + - shellQuote: False + valueFrom: "&&" + - "rm" + - $(inputs.sampleid).vcf.gz + - $(inputs.sampleid).bed diff --git a/cwl/gvcf2fasta/yml/bcftools-consensus-A-IIAA-IA000196-BL-NCR-14AD66938_phased_imputed.yml b/cwl/gvcf2fasta/yml/bcftools-consensus-A-IIAA-IA000196-BL-NCR-14AD66938_phased_imputed.yml new file mode 100644 index 0000000000..0be13637da --- /dev/null +++ b/cwl/gvcf2fasta/yml/bcftools-consensus-A-IIAA-IA000196-BL-NCR-14AD66938_phased_imputed.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "A-IIAA-IA000196-BL-NCR-14AD66938_phased_imputed" +vcf: + class: File + location: keep:827ea468c00a16bf711bd215ea2ce2e6+175/A-IIAA-IA000196-BL-NCR-14AD66938_phased_imputed.vcf.gz +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz +haplotype: 1 +mask: + class: File + location: keep:17670aabfa66091b19c8c2fbfb35cbf9+145/A-IIAA-IA000196-BL-NCR-14AD66938_intersect.bed diff --git a/cwl/gvcf2fasta/yml/bcftools-consensus-test-newadni.yml b/cwl/gvcf2fasta/yml/bcftools-consensus-test-newadni.yml new file mode 100644 index 0000000000..0656c7ca8e --- /dev/null +++ b/cwl/gvcf2fasta/yml/bcftools-consensus-test-newadni.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "A-WCAP-WC000711-BL-COL-39141BL1" +vcf: + class: File + location: keep:97686398b32cd680c674e0e5174078e3+6243/A-WCAP-WC000711-BL-COL-39141BL1_varonly.vcf.gz +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz +haplotype: 1 +mask: + class: File + location: keep:97686398b32cd680c674e0e5174078e3+6243/A-WCAP-WC000711-BL-COL-39141BL1_nocall.bed diff --git a/cwl/gvcf2fasta/yml/bcftools-consensus-test-varonly.yml b/cwl/gvcf2fasta/yml/bcftools-consensus-test-varonly.yml new file mode 100644 index 0000000000..7d83717f76 --- /dev/null +++ b/cwl/gvcf2fasta/yml/bcftools-consensus-test-varonly.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "HG00551.haplotypeCalls.er.raw" +vcf: + class: File + location: keep:ae8975b43e2b9f1ab9e3d09395f3c0f6+604/HG00551.haplotypeCalls.er.raw_varonly.vcf.gz +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz +haplotype: 1 +mask: + class: File + location: keep:ae8975b43e2b9f1ab9e3d09395f3c0f6+604/HG00551.haplotypeCalls.er.raw_nocall.bed diff --git a/cwl/gvcf2fasta/yml/bcftools-consensus-test.yml b/cwl/gvcf2fasta/yml/bcftools-consensus-test.yml new file mode 100644 index 0000000000..61ccca3af8 --- /dev/null +++ b/cwl/gvcf2fasta/yml/bcftools-consensus-test.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "HG00551.haplotypeCalls.er.raw" +vcf: + class: File + location: keep:3c41c90c022a9307ffa905426b04096b+5179901/HG00551.haplotypeCalls.er.raw.vcf.gz +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz +haplotype: 1 +mask: + class: File + location: keep:ae8975b43e2b9f1ab9e3d09395f3c0f6+604/HG00551.haplotypeCalls.er.raw_nocall.bed diff --git a/cwl/gvcf2fasta/yml/concat-get_bed_varonlyvcf-A-WCAP-WC001544-BL-COL-49161BL1.yml b/cwl/gvcf2fasta/yml/concat-get_bed_varonlyvcf-A-WCAP-WC001544-BL-COL-49161BL1.yml new file mode 100644 index 0000000000..ea8f665520 --- /dev/null +++ b/cwl/gvcf2fasta/yml/concat-get_bed_varonlyvcf-A-WCAP-WC001544-BL-COL-49161BL1.yml @@ -0,0 +1,12 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "A-WCAP-WC001544-BL-COL-49161BL1" +splitvcfdir: + class: Directory + location: keep:7ad01e84179c627d92b067ebe0c1e7bb+5893 +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed diff --git a/cwl/gvcf2fasta/yml/get_bed_varonlyvcf-test.yml b/cwl/gvcf2fasta/yml/get_bed_varonlyvcf-test.yml new file mode 100644 index 0000000000..1137060601 --- /dev/null +++ b/cwl/gvcf2fasta/yml/get_bed_varonlyvcf-test.yml @@ -0,0 +1,11 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "HG00551.haplotypeCalls.er.raw" +vcf: + class: File + location: keep:3c41c90c022a9307ffa905426b04096b+5179901/HG00551.haplotypeCalls.er.raw.vcf.gz +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test-newadni.yml b/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test-newadni.yml new file mode 100644 index 0000000000..f289b3c0ef --- /dev/null +++ b/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test-newadni.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "A-WCAP-WC000711-BL-COL-39141BL1" +vcf: + class: File + location: keep:3eead47d28a9cfb1fb1dc4fde1d38af0+4961919/A-WCAP-WC000711-BL-COL-39141BL1_vcpa1.1.allchr_g.vcf.gz +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test.yml b/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test.yml new file mode 100644 index 0000000000..0e9645ef6d --- /dev/null +++ b/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "HG00551.haplotypeCalls.er.raw" +vcf: + class: File + location: keep:3c41c90c022a9307ffa905426b04096b+5179901/HG00551.haplotypeCalls.er.raw.vcf.gz +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test37.yml b/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test37.yml new file mode 100644 index 0000000000..50da8515e5 --- /dev/null +++ b/cwl/gvcf2fasta/yml/gvcf2fasta-wf-test37.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "LP6005441-DNA_A01" +vcf: + class: File + location: keep:865a18acdb12f8d41b76a1bff76b2211+89722/LP6005441-DNA_A01.vcf.gz +gqcutoff: 20 +genomebed: + class: File + location: keep:1310f01a495c81d3c7d3154f1a73527f+63/human_g1k_v37.bed +ref: + class: File + location: keep:5a42cfaddd3a9cfc4fac89b3fe73c6f6+751/human_g1k_v37.fasta.gz diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-imputation-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-imputation-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml new file mode 100644 index 0000000000..d3ecbb1267 --- /dev/null +++ b/cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-imputation-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml @@ -0,0 +1,25 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz +sampleid: "A-PRHS-PR000971-BL-COL-47620BL1" +vcf: + class: File + location: keep:4cba97691d17f8542116c74ca6c1d89d+2045/A-PRHS-PR000971-BL-COL-47620BL1_vcpa1.1.allchr_g.vcf.gz +chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"] +refsdir: + class: Directory + location: keep:3a1673a2230877bfaf92b50f7376529c+10590 +mapsdir: + class: Directory + location: keep:10b1baade985c576a97dfc37d12b953b+1096 +panelnocallbed: + class: File + location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml new file mode 100644 index 0000000000..22aaa494ee --- /dev/null +++ b/cwl/gvcf2fasta/yml/gvcf2fasta_nonrefvcf-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz +sampleid: A-PRHS-PR000971-BL-COL-47620BL1 +vcf: + class: File + location: keep:4cba97691d17f8542116c74ca6c1d89d+2045/A-PRHS-PR000971-BL-COL-47620BL1_vcpa1.1.allchr_g.vcf.gz diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-IIAA-IA000196-BL-NCR-14AD66938.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-IIAA-IA000196-BL-NCR-14AD66938.yml new file mode 100644 index 0000000000..0398aa4e38 --- /dev/null +++ b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-IIAA-IA000196-BL-NCR-14AD66938.yml @@ -0,0 +1,28 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "A-IIAA-IA000196-BL-NCR-14AD66938" +splitvcfdir: + class: Directory + location: keep:27e8ae061969d12e6137aa9b2e9b585c+6128 +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz +chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"] +refsdir: + class: Directory + location: keep:3a1673a2230877bfaf92b50f7376529c+10590 +mapsdir: + class: Directory + location: keep:10b1baade985c576a97dfc37d12b953b+1096 +panelnocallbed: + class: File + location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed +panelcallbed: + class: File + location: keep:ea4364c48def931b8cdaf07913a3657b+94/HG001_GRCh38_1_22_v4.2.1_benchmark.bed diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml new file mode 100644 index 0000000000..4063dc516c --- /dev/null +++ b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-imputation-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml @@ -0,0 +1,25 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "A-WCAP-WC001544-BL-COL-49161BL1" +splitvcfdir: + class: Directory + location: keep:7ad01e84179c627d92b067ebe0c1e7bb+5893 +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz +chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"] +refsdir: + class: Directory + location: keep:3a1673a2230877bfaf92b50f7376529c+10590 +mapsdir: + class: Directory + location: keep:10b1baade985c576a97dfc37d12b953b+1096 +panelnocallbed: + class: File + location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml new file mode 100644 index 0000000000..ef0db6afe7 --- /dev/null +++ b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcf-wf-A-WCAP-WC001544-BL-COL-49161BL1.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "A-WCAP-WC001544-BL-COL-49161BL1" +splitvcfdir: + class: Directory + location: keep:7ad01e84179c627d92b067ebe0c1e7bb+5893 +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz diff --git a/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcftar-wf-test.yml b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcftar-wf-test.yml new file mode 100644 index 0000000000..df6431b4ea --- /dev/null +++ b/cwl/gvcf2fasta/yml/gvcf2fasta_splitvcftar-wf-test.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "A-WCAP-WC000711-BL-COL-39141BL1" +vcftar: + class: File + location: keep:9b09803eb9ccd2b31d4db4ce80ed7a03+5003343/A-WCAP-WC000711-BL-COL-39141BL1_vcpa1.0_gatk_GRU-IRB-PUB.gvcf.tar.gz +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz diff --git a/cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-16gvcf.yml b/cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-16gvcf.yml new file mode 100644 index 0000000000..1c6cf143e9 --- /dev/null +++ b/cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-16gvcf.yml @@ -0,0 +1,13 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfsdir: + class: Directory + location: keep:18966cf8fb85d5aa2b30f5773f02b93e+73402 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz diff --git a/cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-test.yml b/cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-test.yml new file mode 100644 index 0000000000..df78a85f60 --- /dev/null +++ b/cwl/gvcf2fasta/yml/scatter-gvcf2fasta-wf-test.yml @@ -0,0 +1,13 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfsdir: + class: Directory + location: keep:bde5e9a92ee9cf91fe95260c57386731+10722 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz diff --git a/cwl/gvcf2fasta/yml/scatter-gvcf2fasta_splitvcf-imputation-wf-100test.yml b/cwl/gvcf2fasta/yml/scatter-gvcf2fasta_splitvcf-imputation-wf-100test.yml new file mode 100644 index 0000000000..9ad600fe88 --- /dev/null +++ b/cwl/gvcf2fasta/yml/scatter-gvcf2fasta_splitvcf-imputation-wf-100test.yml @@ -0,0 +1,326 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz +chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"] +refsdir: + class: Directory + location: keep:3a1673a2230877bfaf92b50f7376529c+10590 +mapsdir: + class: Directory + location: keep:10b1baade985c576a97dfc37d12b953b+1096 +panelnocallbed: + class: File + location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed +panelcallbed: + class: File + location: keep:ea4364c48def931b8cdaf07913a3657b+94/HG001_GRCh38_1_22_v4.2.1_benchmark.bed +sampleids: + - "A-IIAA-IA000604-BL-NCR-14AD67907" + - "A-IIAA-IA000369-BL-NCR-14AD67573" + - "A-IIAA-IA000196-BL-NCR-14AD66938" + - "A-IIAA-IA000194-BL-NCR-14AD66933" + - "A-IIAA-IA000009-BL-NCR-14AD67102" + - "A-WCAP-WC001736-BL-COL-62260BL1" + - "A-WCAP-WC001725-BL-COL-62376BL1" + - "A-WCAP-WC001708-BL-COL-50951BL1" + - "A-WCAP-WC001695-BL-COL-46967BL1" + - "A-WCAP-WC001710-BL-COL-40530BL1" + - "A-WCAP-WC001720-BL-COL-62328BL1" + - "A-WCAP-WC001704-BL-COL-48409BL1" + - "A-WCAP-WC001702-BL-COL-45946BL1" + - "A-WCAP-WC001697-BL-COL-46951BL1" + - "A-WCAP-WC001699-BL-COL-55120BL1" + - "A-WCAP-WC001700-BL-COL-57684BL1" + - "A-WCAP-WC001696-BL-COL-50506BL1" + - "A-WCAP-WC001693-BL-COL-48336BL1" + - "A-WCAP-WC001683-BL-COL-49188BL1" + - "A-WCAP-WC001687-BL-COL-48252BL1" + - "A-WCAP-WC001694-BL-COL-45207BL1" + - "A-WCAP-WC001686-BL-COL-50549BL1" + - "A-WCAP-WC001691-BL-COL-46031BL1" + - "A-WCAP-WC001688-BL-COL-50977BL1" + - "A-WCAP-WC001684-BL-COL-47006BL1" + - "A-WCAP-WC001681-BL-COL-57657BL1" + - "A-WCAP-WC001682-BL-COL-47560BL1" + - "A-WCAP-WC001679-BL-COL-49250BL1" + - "A-WCAP-WC001672-BL-COL-45187BL1" + - "A-WCAP-WC001673-BL-COL-48284BL1" + - "A-WCAP-WC001675-BL-COL-48314BL1" + - "A-WCAP-WC001677-BL-COL-46990BL1" + - "A-WCAP-WC001671-BL-COL-50527BL1" + - "A-WCAP-WC001669-BL-COL-57703BL1" + - "A-WCAP-WC001667-BL-COL-46970BL1" + - "A-WCAP-WC001670-BL-COL-50445BL1" + - "A-WCAP-WC001654-BL-COL-46029BL1" + - "A-WCAP-WC001664-BL-COL-39293BL1" + - "A-WCAP-WC001665-BL-COL-47583BL1" + - "A-WCAP-WC001653-BL-COL-48362BL1" + - "A-WCAP-WC001657-BL-COL-65820BL1" + - "A-WCAP-WC001658-BL-COL-50384BL1" + - "A-WCAP-WC001666-BL-COL-48390BL1" + - "A-WCAP-WC001656-BL-COL-57746BL1" + - "A-WCAP-WC001638-BL-COL-47008BL1" + - "A-WCAP-WC001639-BL-COL-41818BL1" + - "A-WCAP-WC001646-BL-COL-45962BL1" + - "A-WCAP-WC001652-BL-COL-47594BL1" + - "A-WCAP-WC001640-BL-COL-47537BL1" + - "A-WCAP-WC001629-BL-COL-69602BL1" + - "A-WCAP-WC001641-BL-COL-46986BL1" + - "A-WCAP-WC001645-BL-COL-41786BL1" + - "A-WCAP-WC001636-BL-COL-47553BL1" + - "A-WCAP-WC001634-BL-COL-50462BL1" + - "A-WCAP-WC001623-BL-COL-56498BL1" + - "A-WCAP-WC001627-BL-COL-56607BL1" + - "A-WCAP-WC001626-BL-COL-48292BL1" + - "A-WCAP-WC001621-BL-COL-48345BL1" + - "A-WCAP-WC001618-BL-COL-50400BL1" + - "A-WCAP-WC001622-BL-COL-50921BL1" + - "A-WCAP-WC001616-BL-COL-56626BL1" + - "A-WCAP-WC001617-BL-COL-45961BL1" + - "A-WCAP-WC001612-BL-COL-49158BL1" + - "A-WCAP-WC001608-BL-COL-48342BL1" + - "A-WCAP-WC001611-BL-COL-48295BL1" + - "A-WCAP-WC001605-BL-COL-45954BL1" + - "A-WCAP-WC001594-BL-COL-40429BL1" + - "A-WCAP-WC001606-BL-COL-48422BL1" + - "A-WCAP-WC001595-BL-COL-45200BL1" + - "A-WCAP-WC001602-BL-COL-45226BL1" + - "A-WCAP-WC001604-BL-COL-56480BL1" + - "A-WCAP-WC001598-BL-COL-49123BL1" + - "A-WCAP-WC001603-BL-COL-56489BL1" + - "A-WCAP-WC001587-BL-COL-45975BL1" + - "A-WCAP-WC001593-BL-COL-45249BL1" + - "A-WCAP-WC001589-BL-COL-55018BL1" + - "A-WCAP-WC001577-BL-COL-48318BL1" + - "A-WCAP-WC001586-BL-COL-45943BL1" + - "A-WCAP-WC001585-BL-COL-45991BL1" + - "A-WCAP-WC001584-BL-COL-45976BL1" + - "A-WCAP-WC001581-BL-COL-56483BL1" + - "A-WCAP-WC001582-BL-COL-47610BL1" + - "A-WCAP-WC001572-BL-COL-45937BL1" + - "A-WCAP-WC001574-BL-COL-56642BL1" + - "A-WCAP-WC001567-BL-COL-45235BL1" + - "A-WCAP-WC001573-BL-COL-46034BL1" + - "A-WCAP-WC001568-BL-COL-50455BL1" + - "A-WCAP-WC001566-BL-COL-56566BL1" + - "A-WCAP-WC001559-BL-COL-49283BL1" + - "A-WCAP-WC001556-BL-COL-45259BL1" + - "A-WCAP-WC001564-BL-COL-39158BL1" + - "A-WCAP-WC001552-BL-COL-55020BL1" + - "A-WCAP-WC001563-BL-COL-41119BL1" + - "A-WCAP-WC001558-BL-COL-48277BL1" + - "A-WCAP-WC001545-BL-COL-45981BL1" + - "A-WCAP-WC001547-BL-COL-57785BL1" + - "A-WCAP-WC001543-BL-COL-45240BL1" + - "A-WCAP-WC001542-BL-COL-40920BL1" + - "A-WCAP-WC001541-BL-COL-41762BL1" + - "A-WCAP-WC001544-BL-COL-49161BL1" +splitvcfdirs: + - class: Directory + location: keep:47e5a217867e6a69efe10378541b38e7+7816 + - class: Directory + location: keep:00d1dac7ab3769e1b600129643b3f7bc+8361 + - class: Directory + location: keep:27e8ae061969d12e6137aa9b2e9b585c+6128 + - class: Directory + location: keep:24eb9ef3c45fae2569077d429f121bbd+8778 + - class: Directory + location: keep:00a065a8a5e71acfd083172de3a86d4b+6930 + - class: Directory + location: keep:c751a4f4778156180605ebc04bad6cc1+5894 + - class: Directory + location: keep:7f94ff84914a9d0d873f5313e7124c55+5642 + - class: Directory + location: keep:987b13f6ad4974b796cc2ec8f270d19d+6356 + - class: Directory + location: keep:2284173a8cbcf3e950a41d385069d327+7622 + - class: Directory + location: keep:db084c1516d23a4c6c746105d58a08f3+6356 + - class: Directory + location: keep:a823e23a5f0822a981fa720a2bcb2287+6062 + - class: Directory + location: keep:c7bd505077ecd399b59176f8d5bc34aa+5808 + - class: Directory + location: keep:a590e7f19b8f19f1babdd8b7795e1c20+5681 + - class: Directory + location: keep:1e1232a1a1df7b39605a4630464ef864+5517 + - class: Directory + location: keep:f5c57ac585ab060ca4f9af439acd3e78+5682 + - class: Directory + location: keep:714619e20a6cb2220fae47d5519d2b9f+5093 + - class: Directory + location: keep:d572b35e03d4a2545e4c917506738917+5383 + - class: Directory + location: keep:cca9ddebe18cdde474f9b9ceb33c0247+5768 + - class: Directory + location: keep:55a3599a3b6adac75c19772f2fd0e080+5683 + - class: Directory + location: keep:f4725fe4ae3032ff1f6852701aef182b+5176 + - class: Directory + location: keep:a046387c19b4b4e92d3728cfb5c2239d+5468 + - class: Directory + location: keep:ca6aabbfe01db391a27c755f413f7e24+5010 + - class: Directory + location: keep:f880cb3c4fb2dee626a7afdce73f6b35+5051 + - class: Directory + location: keep:213d15a3e1e2642b593449943d54f940+5851 + - class: Directory + location: keep:e5df0c86fe692bdc234acdea89c09735+5512 + - class: Directory + location: keep:3dcd72a55d24501eb4eeab04e735bc9e+6058 + - class: Directory + location: keep:a56ad75332c6e504237d20f17006b306+5558 + - class: Directory + location: keep:15fae2b96e4f4c28e9473755ade2beb0+5515 + - class: Directory + location: keep:7be46744f59209dd25710bfa8bfb59d0+6527 + - class: Directory + location: keep:bd0a1e4399598a231a8e78c475e94e22+5769 + - class: Directory + location: keep:209ab994cae2c50c0f0f409cecfe0af4+5343 + - class: Directory + location: keep:507185affe0707d6eb0269008717be79+4756 + - class: Directory + location: keep:57af40c026feaf3da5ab7e095caeeae1+5725 + - class: Directory + location: keep:3f515cba6b180bb7aa151ab05ce43270+5936 + - class: Directory + location: keep:15350af160d548a437e45a1ca0432363+7786 + - class: Directory + location: keep:dd76f2ee8fc0579b64e685d30e5d9922+5427 + - class: Directory + location: keep:fde51804f15e0fea5a6bb7be37e1d262+6734 + - class: Directory + location: keep:141fb2f192c4e1efdb6e373543022ab7+6568 + - class: Directory + location: keep:2a96bea877d4a9cf25753c1298f34e58+6612 + - class: Directory + location: keep:6edb216921b036a20cdd32583f2970b6+6022 + - class: Directory + location: keep:f26422b6b05b8bb1e9f486e5c09051b2+5640 + - class: Directory + location: keep:4ed2571eace3eb2963867ca835862646+6061 + - class: Directory + location: keep:a5bf8756702a8f79723d3b134a8c6cff+5725 + - class: Directory + location: keep:05fe61865950248bc6ed9f732426f42a+5385 + - class: Directory + location: keep:e48587348ce4b238ad6594f3a862fca7+7832 + - class: Directory + location: keep:802ba4f4f4a04e53f9e3120f5a871fd9+6902 + - class: Directory + location: keep:22b66f6b397d2e051740d0b3b896c13c+5892 + - class: Directory + location: keep:a6666076ab9bf6963e52d82206b17581+5429 + - class: Directory + location: keep:ed99ae4b5448d5e998444cdc2d288c4f+5978 + - class: Directory + location: keep:31e562eecd2259dc0404f83f138eb13a+6814 + - class: Directory + location: keep:4b247a882829c85824ca49309e51f8b3+5470 + - class: Directory + location: keep:4a3a45a029be557ceb627050b278404c+5097 + - class: Directory + location: keep:96576082494eacc33d34891643247e16+5639 + - class: Directory + location: keep:7849fd811c58ff9797956ca88885c072+5134 + - class: Directory + location: keep:8281291d46712c4dee2929be01a8459d+5935 + - class: Directory + location: keep:afdc5af01594e0e0372ab17287575db3+5427 + - class: Directory + location: keep:aa783333788f5dd554055074ed4cb5ab+5384 + - class: Directory + location: keep:21c8d76ef6ee9950cd2bb641b226a57b+5934 + - class: Directory + location: keep:57a4338099666f13ab7cd05bad7c67c8+5892 + - class: Directory + location: keep:04a1b83e91062b8c43eb3d470aaa6c64+5051 + - class: Directory + location: keep:ad31d97aa3d355a666fe07da625f3994+6482 + - class: Directory + location: keep:98acdba4fb52ac698eaad7449660227e+5517 + - class: Directory + location: keep:03b7cd1daf28b6dcc913a45342f37c96+6482 + - class: Directory + location: keep:60c42c1bad792d1d1ebc4c40420e8032+7030 + - class: Directory + location: keep:3e7ef8e480273a67e223db2842d38e43+5513 + - class: Directory + location: keep:b8ce59383ab761c76b35c91773409bf8+6692 + - class: Directory + location: keep:2c7a882d3f13a0299baf866dc83872d7+7029 + - class: Directory + location: keep:8740baf8f9730eff6d40a918a4c20f90+5384 + - class: Directory + location: keep:8e63209016939215a48def1b350dee0a+6650 + - class: Directory + location: keep:cee36503dcd257a70630396eab59e6c1+6481 + - class: Directory + location: keep:d17a17d9fb4d05cbcadde06b99fae806+5430 + - class: Directory + location: keep:c7f9b800e363290047d61904cc872c3e+5769 + - class: Directory + location: keep:2dee32c1ab8b1fcc264458ae2609a18c+4887 + - class: Directory + location: keep:9e18a67bc403b4d51ee4f556c597b689+5932 + - class: Directory + location: keep:7bf3c8ef6a8ed7b4563569e1e4b85154+5051 + - class: Directory + location: keep:cf3ca53fe3fe7955cd8993c9f2bdd24d+5682 + - class: Directory + location: keep:8716d3eff15d14b0a072e481698fc715+6485 + - class: Directory + location: keep:14b53d263217e13caf5755c66b8f9232+4884 + - class: Directory + location: keep:df79fb7025f8706f20ed678e1916fd15+5424 + - class: Directory + location: keep:e90c5745c169fc9f945463fcd065cdfd+4842 + - class: Directory + location: keep:2e65619e3e557ae435b8b24cad86ce0e+6440 + - class: Directory + location: keep:a67a94826b54eb78a4c3e582233579f8+6482 + - class: Directory + location: keep:c184cd5457f7026ba8112492e3741036+6397 + - class: Directory + location: keep:d5c5c607fd49300d94ecd7de39592eff+5177 + - class: Directory + location: keep:3c7bf8b576bce2567590f90362ef0edf+5009 + - class: Directory + location: keep:f83f53f0698ad52a1cb9b265a451eba8+5051 + - class: Directory + location: keep:7680642fc1c8741d6657ca8b30675661+5013 + - class: Directory + location: keep:55e6c7dc3edefb6625ee47ddfbe86f10+6648 + - class: Directory + location: keep:0abaab02ff171c7a3d283ec54c845498+5515 + - class: Directory + location: keep:dc949aef3a7959dc5259aa9e5caff0ac+6525 + - class: Directory + location: keep:52f5abd360f99207bc7266f8f3b4e2ba+5512 + - class: Directory + location: keep:3c0cb444429a6cf0be2ffc6e0359a345+6524 + - class: Directory + location: keep:c6d33856d6620ed3b7dfcfaa9e4fa987+5343 + - class: Directory + location: keep:84dc794f57a9fbf51f92a9add486702b+5341 + - class: Directory + location: keep:f1b7173ab79d41035044f8ffa7ea5595+7956 + - class: Directory + location: keep:7362951a09e4177e83af2ce779700ab8+6188 + - class: Directory + location: keep:5f45b74d16fc04376ff3a16d30518ebb+5935 + - class: Directory + location: keep:0a35ed7284d0851c7a2698026837c604+6900 + - class: Directory + location: keep:0f3da67b2ad0df2886e7fe1e1c1b5338+6777 + - class: Directory + location: keep:7ad01e84179c627d92b067ebe0c1e7bb+5893 diff --git a/cwl/gvcf2fasta/yml/untar-concat-get_bed_varonlyvcf-test.yml b/cwl/gvcf2fasta/yml/untar-concat-get_bed_varonlyvcf-test.yml new file mode 100644 index 0000000000..5d893fba52 --- /dev/null +++ b/cwl/gvcf2fasta/yml/untar-concat-get_bed_varonlyvcf-test.yml @@ -0,0 +1,12 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sampleid: "A-WCAP-WC000711-BL-COL-39141BL1" +vcftar: + class: File + location: keep:9b09803eb9ccd2b31d4db4ce80ed7a03+5003343/A-WCAP-WC000711-BL-COL-39141BL1_vcpa1.0_gatk_GRU-IRB-PUB.gvcf.tar.gz +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed diff --git a/cwl/imputation/bcftools-concat.cwl b/cwl/imputation/bcftools-concat.cwl new file mode 100644 index 0000000000..88092506c2 --- /dev/null +++ b/cwl/imputation/bcftools-concat.cwl @@ -0,0 +1,36 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: beagle5.4 + ResourceRequirement: + coresMin: 2 + ramMin: 5000 + tmpdirMin: 10000 +inputs: + sample: string + vcfs: + type: File[] + secondaryFiles: [.tbi] +outputs: + vcf: + type: File + outputBinding: + glob: "*vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [bcftools, concat] +arguments: + - $(inputs.vcfs) + - "-Oz" + - prefix: "-o" + valueFrom: $(inputs.sample)_rawimputed.vcf.gz + - shellQuote: false + valueFrom: "&&" + - "tabix" + - $(inputs.sample)_rawimputed.vcf.gz diff --git a/cwl/imputation/beagle.cwl b/cwl/imputation/beagle.cwl new file mode 100644 index 0000000000..4f99f4089e --- /dev/null +++ b/cwl/imputation/beagle.cwl @@ -0,0 +1,65 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: beagle5.4 + ResourceRequirement: + coresMin: 2 + ramMin: 10000 +inputs: + sample: string + chr: string + ref: File + map: File + vcf: + type: File + secondaryFiles: [.tbi] +outputs: + rawimputedvcf: + type: File + outputBinding: + glob: "*.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [bcftools, view] +arguments: + - $(inputs.vcf) + - prefix: "--regions" + valueFrom: $(inputs.chr) + - "-Oz" + - prefix: "-o" + valueFrom: $(inputs.sample)_$(inputs.chr).vcf.gz + - shellQuote: false + valueFrom: "&&" + - "java" + - -Xms$(runtime.ram)m + - prefix: "-jar" + valueFrom: "/beagle.05May22.33a.jar" + - prefix: "ref=" + separate: false + valueFrom: $(inputs.ref) + - prefix: "map=" + separate: false + valueFrom: $(inputs.map) + - prefix: "gt=" + separate: false + valueFrom: $(inputs.sample)_$(inputs.chr).vcf.gz + - prefix: "out=" + separate: false + valueFrom: $(inputs.sample)_rawimputed_$(inputs.chr) + - prefix: "nthreads=" + separate: false + valueFrom: $(runtime.cores) + - shellQuote: false + valueFrom: "&&" + - "tabix" + - $(inputs.sample)_rawimputed_$(inputs.chr).vcf.gz + - shellQuote: false + valueFrom: "&&" + - "rm" + - $(inputs.sample)_$(inputs.chr).vcf.gz diff --git a/cwl/imputation/bedtools-intersect.cwl b/cwl/imputation/bedtools-intersect.cwl new file mode 100644 index 0000000000..a3d230c030 --- /dev/null +++ b/cwl/imputation/bedtools-intersect.cwl @@ -0,0 +1,24 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sample: string + a: File + b: File +outputs: + intersectbed: stdout +baseCommand: [bedtools, intersect] +arguments: + - prefix: "-a" + valueFrom: $(inputs.a) + - prefix: "-b" + valueFrom: $(inputs.b) +stdout: $(inputs.sample)_intersect.bed diff --git a/cwl/imputation/bedtools-subtract.cwl b/cwl/imputation/bedtools-subtract.cwl new file mode 100644 index 0000000000..e8feb4f9b8 --- /dev/null +++ b/cwl/imputation/bedtools-subtract.cwl @@ -0,0 +1,24 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sample: string + a: File + b: File +outputs: + subtractbed: stdout +baseCommand: [bedtools, subtract] +arguments: + - prefix: "-a" + valueFrom: $(inputs.a) + - prefix: "-b" + valueFrom: $(inputs.b) +stdout: $(inputs.sample)_subtract.bed diff --git a/cwl/imputation/get-imputedvcf.cwl b/cwl/imputation/get-imputedvcf.cwl new file mode 100644 index 0000000000..fec7beac1c --- /dev/null +++ b/cwl/imputation/get-imputedvcf.cwl @@ -0,0 +1,45 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sample: string + vcf: File +outputs: + imputedvcf: + type: File + outputBinding: + glob: "*.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: zcat +arguments: + - $(inputs.vcf) + - shellQuote: false + valueFrom: "|" + - "egrep" + - "^#|IMP" + - shellQuote: false + valueFrom: "|" + - "egrep" + - prefix: "-v" + valueFrom: '0\|0' + - shellQuote: false + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: false + valueFrom: ">" + - $(inputs.sample).vcf.gz + - shellQuote: false + valueFrom: "&&" + - "tabix" + - $(inputs.sample).vcf.gz diff --git a/cwl/imputation/get-phasedvcf.cwl b/cwl/imputation/get-phasedvcf.cwl new file mode 100644 index 0000000000..f78b381d2b --- /dev/null +++ b/cwl/imputation/get-phasedvcf.cwl @@ -0,0 +1,41 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sample: string + vcf: File +outputs: + phasedvcf: + type: File + outputBinding: + glob: "*.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: zcat +arguments: + - $(inputs.vcf) + - shellQuote: false + valueFrom: "|" + - "egrep" + - prefix: "-v" + valueFrom: '0\|0|IMP' + - shellQuote: false + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: false + valueFrom: ">" + - $(inputs.sample).vcf.gz + - shellQuote: false + valueFrom: "&&" + - "tabix" + - $(inputs.sample).vcf.gz diff --git a/cwl/imputation/imputation-wf.cwl b/cwl/imputation/imputation-wf.cwl new file mode 100644 index 0000000000..6a6782df2d --- /dev/null +++ b/cwl/imputation/imputation-wf.cwl @@ -0,0 +1,58 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: Workflow +requirements: + SubworkflowFeatureRequirement: {} +inputs: + sample: string + chrs: + type: string[] + refsdir: Directory + mapsdir: Directory + vcf: + type: File + secondaryFiles: [.tbi] + nocallbed: File + panelnocallbed: File + panelcallbed: File + genomebed: File + +outputs: + phasedimputedvcf: + type: File + outputSource: merge-phased-imputed-wf/phasedimputedvcf + phasedimputednocallbed: + type: File + outputSource: merge-phased-imputed-wf/phasedimputednocallbed + +steps: + rtg-vcffilter: + run: rtg-vcffilter.cwl + in: + sample: sample + vcf: vcf + excludebed: nocallbed + out: [filteredvcf] + scatter-beagle-wf: + run: scatter-beagle-wf.cwl + in: + sample: sample + chrs: chrs + refsdir: refsdir + mapsdir: mapsdir + vcf: rtg-vcffilter/filteredvcf + out: [rawimputedvcf] + merge-phased-imputed-wf: + run: merge-phased-imputed-wf.cwl + in: + sample: sample + vcf: rtg-vcffilter/filteredvcf + nocallbed: nocallbed + rawimputedvcf: scatter-beagle-wf/rawimputedvcf + panelnocallbed: panelnocallbed + panelcallbed: panelcallbed + genomebed: genomebed + out: [phasedimputedvcf, phasedimputednocallbed] diff --git a/cwl/imputation/match-ref-map-chr.cwl b/cwl/imputation/match-ref-map-chr.cwl new file mode 100644 index 0000000000..2df9bc7864 --- /dev/null +++ b/cwl/imputation/match-ref-map-chr.cwl @@ -0,0 +1,42 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: ExpressionTool +requirements: + InlineJavascriptRequirement: {} +hints: + LoadListingRequirement: + loadListing: shallow_listing +inputs: + chrs: string[] + refsdir: Directory + mapsdir: Directory +outputs: + refs: + type: File[] + maps: + type: File[] +expression: | + ${ + var refs = []; + var maps = []; + + for (var i = 0; i < inputs.chrs.length; i++) { + for (var j = 0; j < inputs.refsdir.listing.length; j++) { + var file = inputs.refsdir.listing[j]; + if (file.nameext == ".bref3" && file.basename.indexOf(inputs.chrs[i]+".") != -1) { + refs.push(file); + } + } + for (var j = 0; j < inputs.mapsdir.listing.length; j++) { + var file = inputs.mapsdir.listing[j]; + if (file.nameext == ".map" && file.basename.indexOf(inputs.chrs[i]+".") != -1) { + maps.push(file); + } + } + } + + return {"refs": refs, "maps": maps}; + } diff --git a/cwl/imputation/merge-phased-imputed-wf.cwl b/cwl/imputation/merge-phased-imputed-wf.cwl new file mode 100644 index 0000000000..5b75f21a1f --- /dev/null +++ b/cwl/imputation/merge-phased-imputed-wf.cwl @@ -0,0 +1,69 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: Workflow +inputs: + sample: string + vcf: + type: File + secondaryFiles: [.tbi] + nocallbed: File + rawimputedvcf: + type: File + secondaryFiles: [.tbi] + panelnocallbed: File + panelcallbed: File + genomebed: File + +outputs: + phasedimputedvcf: + type: File + outputSource: rtg-vcfmerge/phasedimputedvcf + phasedimputednocallbed: + type: File + outputSource: bedtools-intersect_phasedimputednocallbed/intersectbed + +steps: + get-phasedvcf: + run: get-phasedvcf.cwl + in: + sample: sample + vcf: rawimputedvcf + out: [phasedvcf] + get-imputedvcf: + run: get-imputedvcf.cwl + in: + sample: sample + vcf: rawimputedvcf + out: [imputedvcf] + bedtools-intersect_phasedimputednocallbed: + run: bedtools-intersect.cwl + in: + sample: sample + a: nocallbed + b: panelnocallbed + out: [intersectbed] + bedtools-intersect_imputationbed: + run: bedtools-intersect.cwl + in: + sample: sample + a: nocallbed + b: panelcallbed + out: [intersectbed] + rtg-vcffilter-bedtools-intersect: + run: rtg-vcffilter-bedtools-intersect.cwl + in: + sample: sample + vcf: get-imputedvcf/imputedvcf + bed: bedtools-intersect_imputationbed/intersectbed + out: [filteredvcf] + rtg-vcfmerge: + run: rtg-vcfmerge.cwl + in: + sample: sample + vcf: vcf + phasedvcf: get-phasedvcf/phasedvcf + imputedvcf: rtg-vcffilter-bedtools-intersect/filteredvcf + out: [phasedimputedvcf] diff --git a/cwl/imputation/rtg-vcffilter-bedtools-intersect.cwl b/cwl/imputation/rtg-vcffilter-bedtools-intersect.cwl new file mode 100644 index 0000000000..dfacd09a76 --- /dev/null +++ b/cwl/imputation/rtg-vcffilter-bedtools-intersect.cwl @@ -0,0 +1,54 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sample: string + vcf: + type: File + secondaryFiles: [.tbi] + bed: File +outputs: + filteredvcf: + type: File + outputBinding: + glob: "*.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [rtg, vcffilter] +arguments: + - "--remove-overlapping" + - prefix: "-i" + valueFrom: $(inputs.vcf) + - prefix: "-o" + valueFrom: "-" + - shellQuote: false + valueFrom: "|" + - "bedtools" + - "intersect" + - "-header" + - prefix: "-f" + valueFrom: "1" + - prefix: "-a" + valueFrom: "stdin" + - prefix: "-b" + valueFrom: $(inputs.bed) + - shellQuote: false + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: false + valueFrom: ">" + - $(inputs.sample).vcf.gz + - shellQuote: false + valueFrom: "&&" + - "tabix" + - $(inputs.sample).vcf.gz diff --git a/cwl/imputation/rtg-vcffilter.cwl b/cwl/imputation/rtg-vcffilter.cwl new file mode 100644 index 0000000000..e59a0c2fc5 --- /dev/null +++ b/cwl/imputation/rtg-vcffilter.cwl @@ -0,0 +1,31 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sample: string + vcf: + type: File + secondaryFiles: [.tbi] + excludebed: File +outputs: + filteredvcf: + type: File + outputBinding: + glob: "*.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [rtg, vcffilter] +arguments: + - prefix: "-i" + valueFrom: $(inputs.vcf) + - prefix: "-o" + valueFrom: $(inputs.sample).vcf.gz + - prefix: "--exclude-bed" + valueFrom: $(inputs.excludebed) diff --git a/cwl/imputation/rtg-vcfmerge.cwl b/cwl/imputation/rtg-vcfmerge.cwl new file mode 100644 index 0000000000..ef95d1f74e --- /dev/null +++ b/cwl/imputation/rtg-vcfmerge.cwl @@ -0,0 +1,36 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 5000 +inputs: + sample: string + vcf: + type: File + secondaryFiles: [.tbi] + phasedvcf: + type: File + secondaryFiles: [.tbi] + imputedvcf: + type: File + secondaryFiles: [.tbi] +outputs: + phasedimputedvcf: + type: File + outputBinding: + glob: "*.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [rtg, vcfmerge] +arguments: + - "--force-merge-all" + - $(inputs.phasedvcf) + - $(inputs.vcf) + - $(inputs.imputedvcf) + - prefix: "-o" + valueFrom: $(inputs.sample)_phased_imputed.vcf.gz diff --git a/cwl/imputation/scatter-beagle-wf.cwl b/cwl/imputation/scatter-beagle-wf.cwl new file mode 100644 index 0000000000..1f80c00db4 --- /dev/null +++ b/cwl/imputation/scatter-beagle-wf.cwl @@ -0,0 +1,48 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.1 +class: Workflow +requirements: + ScatterFeatureRequirement: {} +inputs: + sample: string + chrs: + type: string[] + refsdir: Directory + mapsdir: Directory + vcf: + type: File + secondaryFiles: [.tbi] + +outputs: + rawimputedvcf: + type: File + outputSource: bcftools-concat/vcf + +steps: + match-ref-map-chr: + run: match-ref-map-chr.cwl + in: + chrs: chrs + refsdir: refsdir + mapsdir: mapsdir + out: [refs, maps] + beagle: + scatter: [chr, ref, map] + scatterMethod: dotproduct + run: beagle.cwl + in: + sample: sample + chr: chrs + ref: match-ref-map-chr/refs + map: match-ref-map-chr/maps + vcf: vcf + out: [rawimputedvcf] + bcftools-concat: + run: bcftools-concat.cwl + in: + sample: sample + vcfs: beagle/rawimputedvcf + out: [vcf] diff --git a/cwl/imputation/yml/imputation-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/imputation/yml/imputation-A-PRHS-PR000971-BL-COL-47620BL1.yml new file mode 100644 index 0000000000..f6562eef58 --- /dev/null +++ b/cwl/imputation/yml/imputation-A-PRHS-PR000971-BL-COL-47620BL1.yml @@ -0,0 +1,27 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"] +sample: "A-PRHS-PR000971-BL-COL-47620BL1" +refsdir: + class: Directory + location: keep:3a1673a2230877bfaf92b50f7376529c+10590 +mapsdir: + class: Directory + location: keep:10b1baade985c576a97dfc37d12b953b+1096 +vcf: + class: File + location: keep:4bb2da1e66635abad432315cb5c13021+553/A-PRHS-PR000971-BL-COL-47620BL1_varonly.vcf.gz +nocallbed: + class: File + location: keep:4bb2da1e66635abad432315cb5c13021+553/A-PRHS-PR000971-BL-COL-47620BL1_nocall.bed +panelnocallbed: + class: File + location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed +panelcallbed: + class: File + location: keep:ea4364c48def931b8cdaf07913a3657b+94/HG001_GRCh38_1_22_v4.2.1_benchmark.bed +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed diff --git a/cwl/imputation/yml/rtg-vcffilter-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/imputation/yml/rtg-vcffilter-A-PRHS-PR000971-BL-COL-47620BL1.yml new file mode 100644 index 0000000000..f7c519906d --- /dev/null +++ b/cwl/imputation/yml/rtg-vcffilter-A-PRHS-PR000971-BL-COL-47620BL1.yml @@ -0,0 +1,11 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sample: "A-PRHS-PR000971-BL-COL-47620BL1" +vcf: + class: File + location: keep:d2db452933c90d6d116107cc687660fb+553/A-PRHS-PR000971-BL-COL-47620BL1_varonly.vcf.gz +excludebed: + class: File + location: keep:d2db452933c90d6d116107cc687660fb+553/A-PRHS-PR000971-BL-COL-47620BL1_nocall.bed diff --git a/cwl/imputation/yml/rtg-vcffilter-test.yml b/cwl/imputation/yml/rtg-vcffilter-test.yml new file mode 100644 index 0000000000..ced2ea6b7f --- /dev/null +++ b/cwl/imputation/yml/rtg-vcffilter-test.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +sample: "A-PRHS-PR000971-BL-COL-47620BL1" +vcf: + class: File + location: keep:4bb2da1e66635abad432315cb5c13021+553/A-PRHS-PR000971-BL-COL-47620BL1_varonly.vcf.gz +includebed: + class: File + location: keep:4bb2da1e66635abad432315cb5c13021+553/A-PRHS-PR000971-BL-COL-47620BL1_nocall.bed +excludebed: + class: File + location: keep:4bb2da1e66635abad432315cb5c13021+553/A-PRHS-PR000971-BL-COL-47620BL1_nocall.bed + diff --git a/cwl/lightning/batch-dirs.cwl b/cwl/lightning/batch-dirs.cwl new file mode 100644 index 0000000000..d1caa68fd5 --- /dev/null +++ b/cwl/lightning/batch-dirs.cwl @@ -0,0 +1,31 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.2 +class: ExpressionTool +requirements: + InlineJavascriptRequirement: {} +inputs: + dirs: + type: + type: array + items: Directory + batchsize: + type: int +outputs: + batches: + type: + type: array + items: + type: array + items: Directory +expression: | + ${ + var batches = []; + for (var i = 0; i < inputs.dirs.length; i+=inputs.batchsize) { + var batch = inputs.dirs.slice(i, i+inputs.batchsize); + batches.push(batch); + } + return {"batches": batches}; + } diff --git a/cwl/lightning/fasta2numpy-multi-wf.cwl b/cwl/lightning/fasta2numpy-multi-wf.cwl new file mode 100644 index 0000000000..f5ce6a9d3f --- /dev/null +++ b/cwl/lightning/fasta2numpy-multi-wf.cwl @@ -0,0 +1,138 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: Workflow +requirements: + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} + StepInputExpressionRequirement: {} +#hints: +# arv:IntermediateOutput: +# outputTTL: 604800 + +inputs: + tagset: + type: File + fastadirs: + type: + type: array + items: Directory + refdirs: + type: + type: array + items: Directory + batchsize: + type: int + matchgenome_array: + type: string[] + regions_nestedarray: + type: + type: array + items: + type: array + items: [File, "null"] + threads_array: + type: int[] + mergeoutput_array: + type: string[] + expandregions_array: + type: int[] + phenotypesdir: + type: Directory + chrs: string[] + snpeffdatadir: Directory + genomeversion_array: string[] + dbsnp: + type: File + secondaryFiles: [.csi] + gnomaddir: Directory + readmeinfo: string[] + +outputs: + stagednpydir: + type: Directory[] + outputSource: lightning-slice-numpy-anno2vcf-wf/stagednpydir + stagedonehotnpydir: + type: Directory[] + outputSource: lightning-slice-numpy-anno2vcf-wf/stagedonehotnpydir + stagedannotationdir: + type: + type: array + items: [Directory, "null"] + outputSource: lightning-slice-numpy-anno2vcf-wf/stagedannotationdir + readme: + type: File[] + outputSource: lightning-slice-numpy-anno2vcf-wf/readme + +steps: + batch-dirs: + run: batch-dirs.cwl + in: + dirs: fastadirs + batchsize: batchsize + out: [batches] + + lightning-import_data: + run: lightning-import.cwl + scatter: fastadirs + in: + saveincomplete: + valueFrom: "false" + tagset: tagset + fastadirs: batch-dirs/batches + out: [lib] + + lightning-import_refs: + run: lightning-import.cwl + scatter: fastadirs + in: + saveincomplete: + valueFrom: "true" + tagset: tagset + fastadirs: refdirs + out: [lib] + + lightning-slice: + run: lightning-slice.cwl + scatter: reflib + in: + datalibs: lightning-import_data/lib + reflib: lightning-import_refs/lib + out: [libdir] + + make-arrays: + run: make-arrays.cwl + in: + matchgenome_array: matchgenome_array + libdir_array: lightning-slice/libdir + genomeversion_array: genomeversion_array + regions_nestedarray: regions_nestedarray + threads_array: threads_array + mergeoutput_array: mergeoutput_array + expandregions_array: expandregions_array + out: [full_matchgenome_array, full_libdir_array, full_genomeversion_array, full_regions_array, full_threads_array, full_mergeoutput_array, full_expandregions_array, full_libname_array] + + lightning-slice-numpy-anno2vcf-wf: + run: lightning-slice-numpy-anno2vcf-wf.cwl + scatter: [matchgenome, libdir, genomeversion, regions, threads, mergeoutput, expandregions, libname] + scatterMethod: dotproduct + in: + matchgenome: make-arrays/full_matchgenome_array + libdir: make-arrays/full_libdir_array + regions: make-arrays/full_regions_array + threads: make-arrays/full_threads_array + mergeoutput: make-arrays/full_mergeoutput_array + expandregions: make-arrays/full_expandregions_array + phenotypesdir: phenotypesdir + libname: make-arrays/full_libname_array + chrs: chrs + snpeffdatadir: snpeffdatadir + genomeversion: make-arrays/full_genomeversion_array + dbsnp: dbsnp + gnomaddir: gnomaddir + readmeinfo: readmeinfo + out: [stagednpydir, stagedonehotnpydir, stagedannotationdir, readme] diff --git a/cwl/lightning/fasta2numpy-wf.cwl b/cwl/lightning/fasta2numpy-wf.cwl new file mode 100644 index 0000000000..a804b9cfac --- /dev/null +++ b/cwl/lightning/fasta2numpy-wf.cwl @@ -0,0 +1,226 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: Workflow +requirements: + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} + StepInputExpressionRequirement: {} + MultipleInputFeatureRequirement: {} + +inputs: + tagset: + type: File + fastadirs: + type: + type: array + items: Directory + refdir: + type: Directory + batchsize: + type: int + regions: + type: File? + matchgenome: + type: string + threads: + type: int + mergeoutput: + type: string + expandregions: + type: int + phenotypesnofamilydir: + type: Directory + phenotypesdir: + type: Directory + trainingsetsize: + type: float + randomseed: + type: int + pcacomponents: + type: int + chrs: string[] + snpeffdatadir: Directory + genomeversion: string + dbsnp: + type: File + secondaryFiles: [.csi] + gnomaddir: Directory + readmeinfo: string[] + +outputs: + stagednpydir: + type: Directory + outputSource: stage-output/stagednpydir + stagedonehotnpydir: + type: Directory + outputSource: stage-output/stagedonehotnpydir + stagedannotationdir: + type: Directory + outputSource: stage-output/stagedannotationdir + readme: + type: File + outputSource: genreadme/readme + +steps: + batch-dirs: + run: batch-dirs.cwl + in: + dirs: fastadirs + batchsize: batchsize + out: [batches] + + lightning-import_data: + run: lightning-import.cwl + scatter: fastadirs + in: + saveincomplete: + valueFrom: "false" + tagset: tagset + fastadirs: batch-dirs/batches + out: [lib] + + lightning-import_refs: + run: lightning-import.cwl + in: + saveincomplete: + valueFrom: "true" + tagset: tagset + fastadirs: refdir + out: [lib] + + lightning-slice: + run: lightning-slice.cwl + in: + datalibs: lightning-import_data/lib + reflib: lightning-import_refs/lib + out: [libdir] + + lightning-tiling-stats: + run: lightning-tiling-stats.cwl + in: + libdir: lightning-slice/libdir + out: [bed] + + lightning-choose-samples: + run: lightning-choose-samples.cwl + in: + matchgenome: matchgenome + libdir: lightning-slice/libdir + phenotypesdir: phenotypesnofamilydir + trainingsetsize: trainingsetsize + randomseed: randomseed + out: [samplescsv] + + lightning-slice-numpy: + run: lightning-slice-numpy.cwl + in: + matchgenome: matchgenome + libdir: lightning-slice/libdir + regions: regions + threads: threads + mergeoutput: mergeoutput + expandregions: expandregions + samplescsv: lightning-choose-samples/samplescsv + out: [outdir, npys, chunktagoffsetcsv] + + lightning-slice-numpy-onehot: + run: lightning-slice-numpy-onehot.cwl + in: + matchgenome: matchgenome + libdir: lightning-slice/libdir + regions: regions + threads: threads + mergeoutput: mergeoutput + expandregions: expandregions + samplescsv: lightning-choose-samples/samplescsv + out: [outdir, npys] + + lightning-slice-numpy-pca: + run: lightning-slice-numpy-pca.cwl + in: + matchgenome: matchgenome + libdir: lightning-slice/libdir + regions: regions + threads: threads + mergeoutput: mergeoutput + expandregions: expandregions + samplescsv: lightning-choose-samples/samplescsv + pcacomponents: pcacomponents + out: [outdir, pcanpy, pcasamplescsv] + + lightning-plot_1-2: + run: lightning-plot.cwl + in: + pcanpy: lightning-slice-numpy-pca/pcanpy + pcasamplescsv: lightning-slice-numpy-pca/pcasamplescsv + phenotypesdir: phenotypesdir + xcomponent: + valueFrom: "1" + ycomponent: + valueFrom: "2" + out: [png] + + lightning-plot_2-3: + run: lightning-plot.cwl + in: + pcanpy: lightning-slice-numpy-pca/pcanpy + pcasamplescsv: lightning-slice-numpy-pca/pcasamplescsv + phenotypesdir: phenotypesdir + xcomponent: + valueFrom: "2" + ycomponent: + valueFrom: "3" + out: [png] + + lightning-anno2vcf-onehot: + run: lightning-anno2vcf.cwl + in: + annodir: lightning-slice-numpy-onehot/outdir + out: [vcfdir] + + make-libname: + run: make-libname.cwl + in: + matchgenome: matchgenome + genomeversion: genomeversion + out: [libname] + + annotate-wf: + run: ../annotation/annotate-wf.cwl + in: + sample: make-libname/libname + chrs: chrs + vcfdir: lightning-anno2vcf-onehot/vcfdir + snpeffdatadir: snpeffdatadir + genomeversion: genomeversion + dbsnp: dbsnp + gnomaddir: gnomaddir + out: [annotatedvcf, summary] + + stage-output: + run: stage-output.cwl + in: + libname: make-libname/libname + npyfiles: + source: [lightning-slice-numpy/npys, lightning-slice-numpy/chunktagoffsetcsv] + linkMerge: merge_flattened + onehotnpyfiles: lightning-slice-numpy-onehot/npys + pcapngs: + source: [lightning-plot_1-2/png, lightning-plot_2-3/png] + linkMerge: merge_flattened + bed: lightning-tiling-stats/bed + annotatedvcf: annotate-wf/annotatedvcf + summary: annotate-wf/summary + out: [stagednpydir, stagedonehotnpydir, stagedannotationdir] + + genreadme: + run: genreadme.cwl + in: + samplescsv: lightning-choose-samples/samplescsv + readmeinfo: readmeinfo + out: [readme] diff --git a/cwl/lightning/genreadme.cwl b/cwl/lightning/genreadme.cwl new file mode 100644 index 0000000000..ec25d41a23 --- /dev/null +++ b/cwl/lightning/genreadme.cwl @@ -0,0 +1,27 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.2 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 2000 +inputs: + samplescsv: File + readmeinfo: string[] + pythonscript: + type: File + default: + class: File + location: src/genreadme.py +outputs: + readme: + type: stdout +arguments: + - $(inputs.pythonscript) + - $(inputs.samplescsv) + - $(inputs.readmeinfo) +stdout: README diff --git a/cwl/lightning/imputation-gvcf2fasta-fasta2numpy-wf.cwl b/cwl/lightning/imputation-gvcf2fasta-fasta2numpy-wf.cwl new file mode 100644 index 0000000000..8e1279853d --- /dev/null +++ b/cwl/lightning/imputation-gvcf2fasta-fasta2numpy-wf.cwl @@ -0,0 +1,119 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: Workflow +requirements: + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} + StepInputExpressionRequirement: {} + +inputs: + sampleids: + type: string[] + splitvcfdirs: + type: Directory[] + gqcutoff: + type: int + genomebed: + type: File + ref: + type: File + chrs: string[] + refsdir: Directory + mapsdir: Directory + panelnocallbed: File + panelcallbed: File + tagset: + type: File + refdir: + type: Directory + batchsize: + type: int + regions: + type: File? + matchgenome: + type: string + threads: + type: int + mergeoutput: + type: string + expandregions: + type: int + +outputs: [] + +steps: + scatter-gvcf2fasta_splitvcf-imputation-wf: + run: ../gvcf2fasta/scatter-gvcf2fasta_splitvcf-imputation-wf.cwl + in: + sampleids: sampleids + splitvcfdirs: splitvcfdirs + gqcutoff: gqcutoff + genomebed: genomebed + ref: ref + chrs: chrs + refsdir: refsdir + mapsdir: mapsdir + panelnocallbed: panelnocallbed + panelcallbed: panelcallbed + out: [fas] + + make-fastadirs: + run: make-fastadirs.cwl + in: + fas: scatter-gvcf2fasta_splitvcf-imputation-wf/fas + out: [fastadirs] + + batch-dirs: + run: batch-dirs.cwl + in: + dirs: make-fastadirs/fastadirs + batchsize: batchsize + out: [batches] + + lightning-import_data: + run: lightning-import.cwl + scatter: fastadirs + in: + saveincomplete: + valueFrom: "false" + tagset: tagset + fastadirs: batch-dirs/batches + out: [lib] + + lightning-import_refs: + run: lightning-import.cwl + in: + saveincomplete: + valueFrom: "true" + tagset: tagset + fastadirs: refdir + out: [lib] + + lightning-slice: + run: lightning-slice.cwl + in: + datalibs: lightning-import_data/lib + reflib: lightning-import_refs/lib + out: [libdir] + + lightning-tiling-stats: + run: lightning-tiling-stats.cwl + in: + libdir: lightning-slice/libdir + out: [bed] + + lightning-slice-numpy: + run: lightning-slice-numpy.cwl + in: + matchgenome: matchgenome + libdir: lightning-slice/libdir + regions: regions + threads: threads + mergeoutput: mergeoutput + expandregions: expandregions + out: [outdir, npys, chunktagoffsetcsv] diff --git a/cwl/lightning/libray2numpy-wf.cwl b/cwl/lightning/libray2numpy-wf.cwl new file mode 100644 index 0000000000..f5ce6a9d3f --- /dev/null +++ b/cwl/lightning/libray2numpy-wf.cwl @@ -0,0 +1,138 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: Workflow +requirements: + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} + StepInputExpressionRequirement: {} +#hints: +# arv:IntermediateOutput: +# outputTTL: 604800 + +inputs: + tagset: + type: File + fastadirs: + type: + type: array + items: Directory + refdirs: + type: + type: array + items: Directory + batchsize: + type: int + matchgenome_array: + type: string[] + regions_nestedarray: + type: + type: array + items: + type: array + items: [File, "null"] + threads_array: + type: int[] + mergeoutput_array: + type: string[] + expandregions_array: + type: int[] + phenotypesdir: + type: Directory + chrs: string[] + snpeffdatadir: Directory + genomeversion_array: string[] + dbsnp: + type: File + secondaryFiles: [.csi] + gnomaddir: Directory + readmeinfo: string[] + +outputs: + stagednpydir: + type: Directory[] + outputSource: lightning-slice-numpy-anno2vcf-wf/stagednpydir + stagedonehotnpydir: + type: Directory[] + outputSource: lightning-slice-numpy-anno2vcf-wf/stagedonehotnpydir + stagedannotationdir: + type: + type: array + items: [Directory, "null"] + outputSource: lightning-slice-numpy-anno2vcf-wf/stagedannotationdir + readme: + type: File[] + outputSource: lightning-slice-numpy-anno2vcf-wf/readme + +steps: + batch-dirs: + run: batch-dirs.cwl + in: + dirs: fastadirs + batchsize: batchsize + out: [batches] + + lightning-import_data: + run: lightning-import.cwl + scatter: fastadirs + in: + saveincomplete: + valueFrom: "false" + tagset: tagset + fastadirs: batch-dirs/batches + out: [lib] + + lightning-import_refs: + run: lightning-import.cwl + scatter: fastadirs + in: + saveincomplete: + valueFrom: "true" + tagset: tagset + fastadirs: refdirs + out: [lib] + + lightning-slice: + run: lightning-slice.cwl + scatter: reflib + in: + datalibs: lightning-import_data/lib + reflib: lightning-import_refs/lib + out: [libdir] + + make-arrays: + run: make-arrays.cwl + in: + matchgenome_array: matchgenome_array + libdir_array: lightning-slice/libdir + genomeversion_array: genomeversion_array + regions_nestedarray: regions_nestedarray + threads_array: threads_array + mergeoutput_array: mergeoutput_array + expandregions_array: expandregions_array + out: [full_matchgenome_array, full_libdir_array, full_genomeversion_array, full_regions_array, full_threads_array, full_mergeoutput_array, full_expandregions_array, full_libname_array] + + lightning-slice-numpy-anno2vcf-wf: + run: lightning-slice-numpy-anno2vcf-wf.cwl + scatter: [matchgenome, libdir, genomeversion, regions, threads, mergeoutput, expandregions, libname] + scatterMethod: dotproduct + in: + matchgenome: make-arrays/full_matchgenome_array + libdir: make-arrays/full_libdir_array + regions: make-arrays/full_regions_array + threads: make-arrays/full_threads_array + mergeoutput: make-arrays/full_mergeoutput_array + expandregions: make-arrays/full_expandregions_array + phenotypesdir: phenotypesdir + libname: make-arrays/full_libname_array + chrs: chrs + snpeffdatadir: snpeffdatadir + genomeversion: make-arrays/full_genomeversion_array + dbsnp: dbsnp + gnomaddir: gnomaddir + readmeinfo: readmeinfo + out: [stagednpydir, stagedonehotnpydir, stagedannotationdir, readme] diff --git a/cwl/lightning/lightning-anno2vcf.cwl b/cwl/lightning/lightning-anno2vcf.cwl new file mode 100644 index 0000000000..fb23d5c1d4 --- /dev/null +++ b/cwl/lightning/lightning-anno2vcf.cwl @@ -0,0 +1,36 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: CommandLineTool +requirements: + NetworkAccess: + networkAccess: true +hints: + DockerRequirement: + dockerPull: lightning + ResourceRequirement: + coresMin: 64 + ramMin: 500000 + arv:RuntimeConstraints: + keep_cache: 83000 + outputDirType: keep_output_dir +inputs: + annodir: Directory +outputs: + vcfdir: + type: Directory + outputBinding: + glob: "." +baseCommand: [lightning, anno2vcf] +arguments: + - "-local=true" + - prefix: "-input-dir=" + valueFrom: $(inputs.annodir) + separate: false + - prefix: "-output-dir=" + valueFrom: $(runtime.outdir) + separate: false diff --git a/cwl/lightning/lightning-choose-samples.cwl b/cwl/lightning/lightning-choose-samples.cwl new file mode 100644 index 0000000000..f03c585aba --- /dev/null +++ b/cwl/lightning/lightning-choose-samples.cwl @@ -0,0 +1,53 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: CommandLineTool +requirements: + NetworkAccess: + networkAccess: true +hints: + DockerRequirement: + dockerPull: lightning + ResourceRequirement: + coresMin: 2 + ramMin: 10000 + arv:RuntimeConstraints: + keep_cache: 20000 + outputDirType: keep_output_dir +inputs: + matchgenome: string + libdir: Directory + phenotypesdir: Directory + trainingsetsize: float + randomseed: int +outputs: + samplescsv: + type: File + outputBinding: + glob: "samples.csv" +baseCommand: [lightning, choose-samples] +arguments: + - "-local=true" + - prefix: "-input-dir=" + valueFrom: $(inputs.libdir) + separate: false + - prefix: "-output-dir=" + valueFrom: $(runtime.outdir) + separate: false + - prefix: "-match-genome=" + valueFrom: $(inputs.matchgenome) + separate: false + - prefix: "-case-control-file=" + valueFrom: $(inputs.phenotypesdir) + separate: false + - "-case-control-column=AD" + - prefix: "-training-set-size=" + valueFrom: $(inputs.trainingsetsize) + separate: false + - prefix: "-random-seed=" + valueFrom: $(inputs.randomseed) + separate: false diff --git a/cwl/lightning/lightning-import.cwl b/cwl/lightning/lightning-import.cwl new file mode 100644 index 0000000000..693e15372d --- /dev/null +++ b/cwl/lightning/lightning-import.cwl @@ -0,0 +1,56 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: CommandLineTool +requirements: + NetworkAccess: + networkAccess: true +hints: + DockerRequirement: + dockerPull: lightning + arv:dockerCollectionPDH: 1f430e6dd9b6be0ae78d4cffde9b1fef+892 + ResourceRequirement: + coresMin: 96 + ramMin: 670000 + arv:RuntimeConstraints: + keep_cache: 6200 + outputDirType: keep_output_dir +inputs: + saveincomplete: + type: string + tagset: + type: File + fastadirs: + type: + - Directory + - type: array + items: Directory +outputs: + lib: + type: File + outputBinding: + glob: "*gob.gz" +baseCommand: [lightning, import] +arguments: + - "-local=true" + - "-loglevel=info" + - "-skip-ooo=true" + - "-output-tiles=true" + - "-batches=1" + - "-batch=0" + - prefix: "-save-incomplete-tiles=" + valueFrom: $(inputs.saveincomplete) + separate: false + - prefix: "-match-chromosome" + valueFrom: "^(chr)?([0-9]+|X|Y|M)$" + - prefix: "-output-stats" + valueFrom: "stats.json" + - prefix: "-tag-library" + valueFrom: $(inputs.tagset) + - prefix: "-o" + valueFrom: "library.gob.gz" + - $(inputs.fastadirs) diff --git a/cwl/lightning/lightning-plot.cwl b/cwl/lightning/lightning-plot.cwl new file mode 100644 index 0000000000..cd4d198da4 --- /dev/null +++ b/cwl/lightning/lightning-plot.cwl @@ -0,0 +1,53 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: CommandLineTool +requirements: + NetworkAccess: + networkAccess: true +hints: + DockerRequirement: + dockerPull: lightning + ResourceRequirement: + coresMin: 2 + ramMin: 10000 + arv:RuntimeConstraints: + keep_cache: 20000 + outputDirType: keep_output_dir +inputs: + pcanpy: File + pcasamplescsv: File + phenotypesdir: Directory + xcomponent: string + ycomponent: string +outputs: + png: + type: File + outputBinding: + glob: "*.png" +baseCommand: [lightning, plot] +arguments: + - "-local=true" + - prefix: "-i=" + valueFrom: $(inputs.pcanpy) + separate: false + - prefix: "-o=" + valueFrom: "plot_$(inputs.xcomponent)-$(inputs.ycomponent).png" + separate: false + - prefix: "-samples=" + valueFrom: $(inputs.pcasamplescsv) + separate: false + - prefix: "-phenotype=" + valueFrom: $(inputs.phenotypesdir) + separate: false + - "-phenotype-cat1-column=7" + - prefix: "-x=" + valueFrom: $(inputs.xcomponent) + separate: false + - prefix: "-y=" + valueFrom: $(inputs.ycomponent) + separate: false diff --git a/cwl/lightning/lightning-slice-numpy-anno2vcf-wf.cwl b/cwl/lightning/lightning-slice-numpy-anno2vcf-wf.cwl new file mode 100644 index 0000000000..874bc41aed --- /dev/null +++ b/cwl/lightning/lightning-slice-numpy-anno2vcf-wf.cwl @@ -0,0 +1,113 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.2 +class: Workflow +requirements: + InlineJavascriptRequirement: {} + SubworkflowFeatureRequirement: {} + MultipleInputFeatureRequirement: {} + +inputs: + matchgenome: string + libdir: Directory + regions: File? + threads: int + mergeoutput: string + expandregions: int + phenotypesdir: Directory + libname: string + chrs: string[] + snpeffdatadir: Directory + genomeversion: string + dbsnp: + type: File + secondaryFiles: [.csi] + gnomaddir: Directory + readmeinfo: string[] + +outputs: + stagednpydir: + type: Directory + outputSource: stage-output/stagednpydir + stagedonehotnpydir: + type: Directory + outputSource: stage-output/stagedonehotnpydir + stagedannotationdir: + type: Directory? + outputSource: stage-output/stagedannotationdir + readme: + type: File + outputSource: genreadme/readme + +steps: + lightning-tiling-stats: + run: lightning-tiling-stats.cwl + when: $(inputs.regions == null) + in: + libdir: libdir + out: [bed] + + lightning-slice-numpy: + run: lightning-slice-numpy.cwl + in: + matchgenome: matchgenome + libdir: libdir + regions: regions + threads: threads + mergeoutput: mergeoutput + expandregions: expandregions + out: [outdir, npys, samplescsv, chunktagoffsetcsv] + + lightning-slice-numpy-onehot: + run: lightning-slice-numpy-onehot.cwl + in: + matchgenome: matchgenome + libdir: libdir + regions: regions + threads: threads + mergeoutput: mergeoutput + expandregions: expandregions + phenotypesdir: phenotypesdir + out: [outdir, npys, samplescsv] + + lightning-anno2vcf-onehot: + run: lightning-anno2vcf.cwl + in: + annodir: lightning-slice-numpy-onehot/outdir + out: [vcfdir] + + annotate-wf: + run: ../annotation/annotate-wf.cwl + in: + sample: libname + chrs: chrs + vcfdir: lightning-anno2vcf-onehot/vcfdir + snpeffdatadir: snpeffdatadir + genomeversion: genomeversion + dbsnp: dbsnp + gnomaddir: gnomaddir + out: [annotatedvcf, summary] + + stage-output: + run: stage-output.cwl + in: + libname: libname + npyfiles: + source: [lightning-slice-numpy/npys, lightning-slice-numpy/samplescsv, lightning-slice-numpy/chunktagoffsetcsv] + linkMerge: merge_flattened + onehotnpyfiles: + source: [lightning-slice-numpy-onehot/npys, lightning-slice-numpy-onehot/samplescsv] + linkMerge: merge_flattened + bed: lightning-tiling-stats/bed + annotatedvcf: annotate-wf/annotatedvcf + summary: annotate-wf/summary + out: [stagednpydir, stagedonehotnpydir, stagedannotationdir] + + genreadme: + run: genreadme.cwl + in: + samplescsv: lightning-slice-numpy/samplescsv + readmeinfo: readmeinfo + out: [readme] diff --git a/cwl/lightning/lightning-slice-numpy-onehot.cwl b/cwl/lightning/lightning-slice-numpy-onehot.cwl new file mode 100644 index 0000000000..15ca5471de --- /dev/null +++ b/cwl/lightning/lightning-slice-numpy-onehot.cwl @@ -0,0 +1,67 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: CommandLineTool +requirements: + NetworkAccess: + networkAccess: true +hints: + DockerRequirement: + dockerPull: lightning + ResourceRequirement: + coresMin: 64 + ramMin: 660000 + arv:RuntimeConstraints: + keep_cache: 83000 + outputDirType: keep_output_dir +inputs: + matchgenome: string + libdir: Directory + regions: File? + threads: int + mergeoutput: string + expandregions: int + samplescsv: File +outputs: + outdir: + type: Directory + outputBinding: + glob: "." + npys: + type: File[] + outputBinding: + glob: "*npy" +baseCommand: [lightning, slice-numpy] +arguments: + - "-local=true" + - prefix: "-input-dir=" + valueFrom: $(inputs.libdir) + separate: false + - prefix: "-output-dir=" + valueFrom: $(runtime.outdir) + separate: false + - prefix: "-match-genome=" + valueFrom: $(inputs.matchgenome) + separate: false + - prefix: "-regions=" + valueFrom: $(inputs.regions) + separate: false + - prefix: "-threads=" + valueFrom: $(inputs.threads) + separate: false + - prefix: "-merge-output=" + valueFrom: $(inputs.mergeoutput) + separate: false + - prefix: "-expand-regions=" + valueFrom: $(inputs.expandregions) + separate: false + - prefix: "-samples=" + valueFrom: $(inputs.samplescsv) + separate: false + - "-single-onehot=true" + - "-chi2-p-value=0.01" + - "-min-coverage=0.9" diff --git a/cwl/lightning/lightning-slice-numpy-pca.cwl b/cwl/lightning/lightning-slice-numpy-pca.cwl new file mode 100644 index 0000000000..1e85f04dab --- /dev/null +++ b/cwl/lightning/lightning-slice-numpy-pca.cwl @@ -0,0 +1,75 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: CommandLineTool +requirements: + NetworkAccess: + networkAccess: true +hints: + DockerRequirement: + dockerPull: lightning + ResourceRequirement: + coresMin: 64 + ramMin: 1500000 + arv:RuntimeConstraints: + keep_cache: 83000 + outputDirType: keep_output_dir +inputs: + matchgenome: string + libdir: Directory + regions: File? + threads: int + mergeoutput: string + expandregions: int + samplescsv: File + pcacomponents: int +outputs: + outdir: + type: Directory + outputBinding: + glob: "." + pcanpy: + type: File + outputBinding: + glob: "pca.npy" + pcasamplescsv: + type: File + outputBinding: + glob: "samples.csv" +baseCommand: [lightning, slice-numpy] +arguments: + - "-local=true" + - prefix: "-input-dir=" + valueFrom: $(inputs.libdir) + separate: false + - prefix: "-output-dir=" + valueFrom: $(runtime.outdir) + separate: false + - prefix: "-match-genome=" + valueFrom: $(inputs.matchgenome) + separate: false + - prefix: "-regions=" + valueFrom: $(inputs.regions) + separate: false + - prefix: "-threads=" + valueFrom: $(inputs.threads) + separate: false + - prefix: "-merge-output=" + valueFrom: $(inputs.mergeoutput) + separate: false + - prefix: "-expand-regions=" + valueFrom: $(inputs.expandregions) + separate: false + - prefix: "-samples=" + valueFrom: $(inputs.samplescsv) + separate: false + - "-pca=true" + - prefix: "-pca-components=" + valueFrom: $(inputs.pcacomponents) + separate: false + - "-min-coverage=0.98" + - "-max-pca-tiles=100000" diff --git a/cwl/lightning/lightning-slice-numpy.cwl b/cwl/lightning/lightning-slice-numpy.cwl new file mode 100644 index 0000000000..9cd8452bf4 --- /dev/null +++ b/cwl/lightning/lightning-slice-numpy.cwl @@ -0,0 +1,68 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: CommandLineTool +requirements: + NetworkAccess: + networkAccess: true +hints: + DockerRequirement: + dockerPull: lightning + ResourceRequirement: + coresMin: 64 + ramMin: 660000 + arv:RuntimeConstraints: + keep_cache: 83000 + outputDirType: keep_output_dir +inputs: + matchgenome: string + libdir: Directory + regions: File? + threads: int + mergeoutput: string + expandregions: int + samplescsv: File? +outputs: + outdir: + type: Directory + outputBinding: + glob: "." + npys: + type: File[] + outputBinding: + glob: "matrix.*.npy" + chunktagoffsetcsv: + type: File + outputBinding: + glob: "chunk-tag-offset.csv" +baseCommand: [lightning, slice-numpy] +arguments: + - "-local=true" + - prefix: "-input-dir=" + valueFrom: $(inputs.libdir) + separate: false + - prefix: "-output-dir=" + valueFrom: $(runtime.outdir) + separate: false + - prefix: "-match-genome=" + valueFrom: $(inputs.matchgenome) + separate: false + - prefix: "-regions=" + valueFrom: $(inputs.regions) + separate: false + - prefix: "-threads=" + valueFrom: $(inputs.threads) + separate: false + - prefix: "-merge-output=" + valueFrom: $(inputs.mergeoutput) + separate: false + - prefix: "-expand-regions=" + valueFrom: $(inputs.expandregions) + separate: false + - prefix: "-samples=" + valueFrom: $(inputs.samplescsv) + separate: false diff --git a/cwl/lightning/lightning-slice.cwl b/cwl/lightning/lightning-slice.cwl new file mode 100644 index 0000000000..d30edb19a2 --- /dev/null +++ b/cwl/lightning/lightning-slice.cwl @@ -0,0 +1,40 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: CommandLineTool +requirements: + NetworkAccess: + networkAccess: true +hints: + DockerRequirement: + dockerPull: lightning + arv:dockerCollectionPDH: 1f430e6dd9b6be0ae78d4cffde9b1fef+892 + ResourceRequirement: + coresMin: 96 + ramMin: 660000 + arv:RuntimeConstraints: + keep_cache: 6200 + outputDirType: keep_output_dir +inputs: + datalibs: + type: + type: array + items: File + reflib: + type: File +outputs: + libdir: + type: Directory + outputBinding: + glob: "." +baseCommand: [lightning, slice] +arguments: + - "-local=true" + - prefix: "-output-dir" + valueFrom: $(runtime.outdir) + - $(inputs.datalibs) + - $(inputs.reflib) diff --git a/cwl/lightning/lightning-tiling-stats.cwl b/cwl/lightning/lightning-tiling-stats.cwl new file mode 100644 index 0000000000..062d256757 --- /dev/null +++ b/cwl/lightning/lightning-tiling-stats.cwl @@ -0,0 +1,35 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.2 +class: CommandLineTool +requirements: + NetworkAccess: + networkAccess: true +hints: + DockerRequirement: + dockerPull: lightning + arv:dockerCollectionPDH: 1f430e6dd9b6be0ae78d4cffde9b1fef+892 + ResourceRequirement: + coresMin: 2 + ramMin: 8000 + arv:RuntimeConstraints: + keep_cache: 10000 + outputDirType: keep_output_dir +inputs: + libdir: Directory +outputs: + bed: + type: File + outputBinding: + glob: "*bed" +baseCommand: [lightning, tiling-stats] +arguments: + - "-local=true" + - prefix: "-input-dir" + valueFrom: $(inputs.libdir) + - prefix: "-output-dir" + valueFrom: $(runtime.outdir) diff --git a/cwl/lightning/make-arrays.cwl b/cwl/lightning/make-arrays.cwl new file mode 100644 index 0000000000..e1ab06540b --- /dev/null +++ b/cwl/lightning/make-arrays.cwl @@ -0,0 +1,63 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.2 +class: ExpressionTool +requirements: + InlineJavascriptRequirement: {} +inputs: + matchgenome_array: string[] + libdir_array: Directory[] + genomeversion_array: string[] + regions_nestedarray: + type: + type: array + items: + type: array + items: [File, "null"] + threads_array: int[] + mergeoutput_array: string[] + expandregions_array: int[] +outputs: + full_matchgenome_array: string[] + full_libdir_array: Directory[] + full_genomeversion_array: string[] + full_regions_array: + type: + type: array + items: [File, "null"] + full_threads_array: int[] + full_mergeoutput_array: string[] + full_expandregions_array: int[] + full_libname_array: string[] +expression: | + ${ + var full_matchgenome_array = []; + var full_libdir_array = []; + var full_genomeversion_array = []; + var full_regions_array = []; + var full_threads_array = []; + var full_mergeoutput_array = []; + var full_expandregions_array = []; + var full_libname_array = []; + for (var i = 0; i < inputs.matchgenome_array.length; i++) { + for (var j = 0; j < inputs.libdir_array.length; j++) { + for (var k = 0; k < inputs.regions_nestedarray[j].length; k++) { + full_matchgenome_array.push(inputs.matchgenome_array[i]); + full_libdir_array.push(inputs.libdir_array[j]); + full_genomeversion_array.push(inputs.genomeversion_array[j]); + full_regions_array.push(inputs.regions_nestedarray[j][k]); + full_threads_array.push(inputs.threads_array[k]); + full_mergeoutput_array.push(inputs.mergeoutput_array[k]); + full_expandregions_array.push(inputs.expandregions_array[k]); + var libname = inputs.genomeversion_array[j]+inputs.matchgenome_array[i]+"_library"; + full_libname_array.push(libname); + } + } + } + return {"full_matchgenome_array": full_matchgenome_array, + "full_libdir_array": full_libdir_array, "full_genomeversion_array": full_genomeversion_array, + "full_regions_array": full_regions_array, "full_threads_array": full_threads_array, "full_mergeoutput_array": full_mergeoutput_array, "full_expandregions_array": full_expandregions_array, + "full_libname_array": full_libname_array}; + } diff --git a/cwl/lightning/make-fastadirs.cwl b/cwl/lightning/make-fastadirs.cwl new file mode 100644 index 0000000000..e0ad3d5a74 --- /dev/null +++ b/cwl/lightning/make-fastadirs.cwl @@ -0,0 +1,35 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.2 +class: ExpressionTool +requirements: + InlineJavascriptRequirement: {} +hints: + LoadListingRequirement: + loadListing: shallow_listing +inputs: + fas: + type: + type: array + items: + type: array + items: File +outputs: + fastadirs: Directory[] +expression: | + ${ + var fastadirs = []; + for (var i = 0; i < inputs.fas.length; i+=100) { + var fastadir = {"class": "Directory", + "basename": "dir"+String(i/100), + "listing": []}; + for (var j = i; j < Math.min(i+100, inputs.fas.length); j+=1) { + fastadir.listing.push(inputs.fas[j][0]); + fastadir.listing.push(inputs.fas[j][1]); + } + fastadirs.push(fastadir); + } + return {"fastadirs": fastadirs}; + } diff --git a/cwl/lightning/make-libname.cwl b/cwl/lightning/make-libname.cwl new file mode 100644 index 0000000000..c9dc33f93d --- /dev/null +++ b/cwl/lightning/make-libname.cwl @@ -0,0 +1,18 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.2 +class: ExpressionTool +requirements: + InlineJavascriptRequirement: {} +inputs: + matchgenome: string + genomeversion: string +outputs: + libname: string +expression: | + ${ + var libname = inputs.genomeversion+inputs.matchgenome+"_library"; + return {"libname": libname}; + } diff --git a/cwl/lightning/readme.md b/cwl/lightning/readme.md new file mode 100644 index 0000000000..03418646a9 --- /dev/null +++ b/cwl/lightning/readme.md @@ -0,0 +1,17 @@ +Running tiling workflow +=== + +Command +--- + +arvados-cwl-runner --submit --no-wait --project-uuid fasta2numpy-wf.cwl + +For examples of input yml files, see yml/fasta2numpy-wf-100test.yml and yml/fasta2numpy-wf-0831_0315.yml + +Notable parameters for input yml +--- + +fastadirs: an array of fasta directories, in our implementation, each directory consists of around 100 fasta pairs +batchsize: an integer determining the batch size when running lighting-import step, e.g., for batchsize 12, we run lightning-import for 12 fasta directories together as a batch, the resulting libraries then get merged by lightning-slice +matchgenome: a string pattern used for obtaining a subset of the cohort, e.g, matchgenome "ADNI|WCAP" runs tiling for all samples with "ADNI" or "WCAP" in their name, matchgenome "" runs for the entire cohort +trainingsetsize: a float between 0 and 1 to determine the training set size diff --git a/cwl/lightning/src/genreadme.py b/cwl/lightning/src/genreadme.py new file mode 100755 index 0000000000..f5fc68927e --- /dev/null +++ b/cwl/lightning/src/genreadme.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +from __future__ import print_function +import os +import sys + +text = """h1. Data release readme + +Data release candidate: {} +Description: This dataset contains {} human genomes ({}) encoded using the Lightning tiling system for the AI4AD project. It is published at {}. + +Collection contains: +* library_full/ -- Full Tiled Data Set +** matrix.0000.npy, matrix.0001.npy, matrix.0002.npy, ... -- tile variant# for each (sample, tag) +** chunk-tag-offset.csv -- tag offset for each matrix.NNNN.npy file +** samples.csv -- sample ID for each row of matrix.NNNN.npy +* library_filtered/ -- Filtered Tiled Data Set (filtered using chi-square test between tile variants and AD phenotype) +** onehot.npy -- one-hot representation of tiled data filtered by p-value +** onehot-columns.npy -- tag, variant, het/hom, p-value for each column of onehot.npy +** samples.csv -- sample ID for each row of onehot.npy +* GRCh38.86_library_annotation/ -- Annotations for Tiled Data Set +** GRCh38.86_library_snpeff_dbsnp_gnomad.vcf.gz -- annotations for each genomic variant found in tiled dataset +** GRCh38.86_library_snpeff_dbsnp_gnomad.vcf.gz.tbi -- index for annotations vcf +** GRCh38.86_library_summary.txt -- % of variants in each chromosome that were found in gnomad +** hg38.fa.gz.bed -- position of tile set in reference genome + +Tiling Background: + +Tiling abstracts a called genome by partitioning it into overlapping variable length shorter sequences, known as tiles. A tile is a genomic sequence that is braced on either side by 24 base (24-mer) "tags". + +A tile sequence must be at least 248 base pairs long where each tile is labeled with a "position" according to the number of tiles before it. One tile position can have multiple tile variants, one for each sequence observed at that position. When a variation occurs on a tag, we allow tile variants to span multiple steps where the tags would normally end. These tiles that span multiple steps are known as "spanning tiles" + +Our choice of tags ("tag-set") partition the human reference genome into 10,655,006 tiles, composed of 3.1 billion bases (with an average of around 315 bases per tile). The set of all positions and tile variants are stored in is what we call the tile library. An individual's genome can then be easily represented as an array of tag sets that reference tiles in the tile library. Each position in the array corresponds to a tile position and points to the tile variant observed at that position for that individual. + +To create the tiled genomes, we use Lightning, a system that allows for efficient access to large scale population genomic data with a focus on clinical and research use. The Lightning system is a combination of a conceptual way to think about genomes (genomic tiling), the internal representation of genomes for efficient access, and the software that manages access to the data. + +h2. Read me for library_full + +Directory: library_full/ + +Files: + +* matrix.XXX.npy: numpy-encoded matrix with one row per genome, and a pair of columns per tag / tile position (one for each allele). Each matrix element is an integer. For easier loading, the numpy matrix is broken into chunks. : +** -1 indicates a "low quality" tile variant containing no-calls. +** 0 indicates the tag for this tile was not found, i.e., this part of the genome is covered by a spanning tile in an earlier (leftward) column. +** Tile variants can span multiple tile positions if a tag is not found and are known as spanning tiles. +** 1 indicates the most common high quality variant of this tile in this dataset; 2 indicates the 2nd most common; etc. + +* chunk-tag-offset.csv - common separated text file that indicates tag offset for each matrix.NNNN.npy file +** Columns are file name and offset + +* samples.csv: mapping from numpy file (matrix.npy) and row number to input ID for each tiled genome +** Columns are row number, genome ID (usually taken from tile name of gvcf/vcf, and name of npy output + - Example: 0,"A-WCAP-WC000711-BL-COL-39141BL1","matrix.npy" + + +h2. Read me for library_filtered + +Directory: library_filtered/ +Files: +* onehot.npy -- +** The tile variants have been filtered using a chi2 filter between each separate tile variant and the AD phenotype. Only tile positions with 90% coverage are included (i.e. 90% of the tile variants in a tile position do not contain no-calls). +** Contains the positions of the non-zeros elements of the filtered sparse matrix.: two rows: 1) row position 2) column position +** This sparse numpy-encoded matrix has one row per genome, and a pair of columns per tile variant. One column represents the heterozygous tile variant (i.e. tile variant found in 1 allele) and one for homozygous tile variant (i.e. tile variant found in 2 alleles). Each matrix element is an integer with a 1 indicating the tile variant is present in that form and a 0 indicating the tile variant is not present in that format. +** Can create a sparse matrix with the following commands in python: + +import numpy as np +from scipy.sparse import csr_matrix + +Xrc = np.load('onehot.npy') +data = np.ones(Xrc[0,:].shape) +row_ind = Xrc[0,:] +col_ind = Xrc[1,:] +filtered = csr_matrix((data, (row_ind, col_ind))) + +* onehot-columns.npy - +numpy file containing information corresponding to each column of the one-hot matrix representation of the filtered data. +Columns are as follows: tag, tile variant, zygosity with heterozygous = 0 and homozygous = 1, p-value * 1e6 for each column of onehot.npy +* samples.csv -mapping from numpy file (matrix.npy) and row number to input ID for each tiled genome +** Columns are row number, genome ID (usually taken from tile name of gvcf/vcf, and name of npy output + - Example: 0,"A-WCAP-WC000711-BL-COL-39141BL1","matrix.npy" + + +h2. Read me for annotations + +Directory: GRCh38.86_library_annotation/ + +Files: +* GRCh38.86_library_snpeff_dbsnp_gnomad.vcf.gz +** gzipped vcf of each genomic variant found in tile variants containing frequencies and other annotation details (gene, predicted effects, etc) from dbsnp and gnmad. +** ID contains both HGVS and rsID (if found) and INFO contains tile variant (TV: tileposition-tilevariant) as well as the other annotations. All tiles variants contains that genomic variant are listed in the TV field. +- Example: +- #CHROM POS ID REF ALT QUAL FILTER INFO +- chr9 45079 chr9:g.45080del;rs55984476 TC T . . TV=,5649728-1,;ANN=T|intergenic_region|MODIFIER|FAM138C-PGM5P3-AS1|ENSG00000218839-ENSG00000277631|intergenic_region|ENSG00000218839-ENSG00000277631|||n.45080delC||||||;AC=129535;AN=129536;AF=0.999992;AF_afr=0.999966;AF_amr=1;AF_asj=1;AF_eas=1;AF_fin=1;AF_nfe=1;AF_oth=1 +** In this annotation file, for simplicity the name of the chromosome is used instead of the proper HGVS annotation for the reference and chromosome. If you want to search the HGVS annotation you will need to replace it. + - Example: chr3:g.36130213T>A -> NC_000003.12:g.36130213T>A + - Example: chr10:g.13511587G>A -> NC_000010.12:g.13511587G>A + +* GRCh38.86_library_snpeff_dbsnp_gnomad.vcf.gz.tbi +** index file for vcf of annotations + +* GRCh38.86_library_summary.txt +** text file containing % of variants in each chromosome that were found in gnomad +* GRCh38.86_reference_tiles.bed +** bed file containing tile locations on GRCh38 for reference. +** The columns are as follows: +** 1) Chromosome +** 2) Tile start (including tag) +** 3) Tile end (including tag) +** 4) Tag # +** 5) Coverage (this gives a score 0-1000 of how many times this tile is placed in a set of genomes, 1000 means the tag is found in every genome of the set. 0 indicates the tag is not found in any of the genomes. Tag may not be placed due to variants or no-calls existing on the tag. +** 6) Strand (always ., included so that our bed file maintains the bed standard format) +** 7) Tile start (not including tag) +** 8) Tile end (not including tag +- Example: +M 0 467 10654109 870 . 0 443 +M 443 959 10654110 895 . 467 935 +M 935 1394 10654111 985 . 959 1370 +""" + +def count_samples(samplescsv): + count = 0 + with open(samplescsv) as f: + for line in f: + if line != "\n": + count += 1 + return count + +def main(): + samplescsv = sys.argv[1] + date = sys.argv[2] + description = sys.argv[3] + projecturl = sys.argv[4] + + cohortsize = count_samples(samplescsv) + print(text.format(date, cohortsize, description, projecturl)) + +if __name__ == '__main__': + main() diff --git a/cwl/lightning/stage-output.cwl b/cwl/lightning/stage-output.cwl new file mode 100644 index 0000000000..362e4dd170 --- /dev/null +++ b/cwl/lightning/stage-output.cwl @@ -0,0 +1,40 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.2 +class: ExpressionTool +requirements: + InlineJavascriptRequirement: {} +hints: + LoadListingRequirement: + loadListing: shallow_listing +inputs: + libname: string + npyfiles: File[] + onehotnpyfiles: File[] + pcapngs: File[] + bed: File + annotatedvcf: File + summary: File +outputs: + stagednpydir: Directory + stagedonehotnpydir: Directory + stagedannotationdir: Directory +expression: | + ${ + var stagednpydir = {"class": "Directory", + "basename": "library_full", + "listing": inputs.npyfiles}; + var stagedonehotnpydir = {"class": "Directory", + "basename": "library_filtered", + "listing": inputs.onehotnpyfiles}; + var annotationlist = inputs.pcapngs; + annotationlist.push(inputs.bed); + annotationlist.push(inputs.annotatedvcf); + annotationlist.push(inputs.summary); + var stagedannotationdir = {"class": "Directory", + "basename": inputs.libname+"_annotation", + "listing": annotationlist}; + return {"stagednpydir": stagednpydir, "stagedonehotnpydir": stagedonehotnpydir, "stagedannotationdir": stagedannotationdir}; + } diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315.yml new file mode 100644 index 0000000000..97108e673c --- /dev/null +++ b/cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315.yml @@ -0,0 +1,353 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +refdirs: + - class: Directory + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761 +batchsize: 12 +matchgenome_array: [""] +regions_nestedarray: + - - null +threads_array: [10] +mergeoutput_array: ["false"] +expandregions_array: [0] +phenotypesdir: + class: Directory + location: keep:2xpu4-4zz18-yq0njnojx7kaj34 +readmeinfo: ["2022-08-23", "called using VCPA1.1 pipeline", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"] +chrs: + - "chr1" + - "chr2" + - "chr3" + - "chr4" + - "chr5" + - "chr6" + - "chr7" + - "chr8" + - "chr9" + - "chr10" + - "chr11" + - "chr12" + - "chr13" + - "chr14" + - "chr15" + - "chr16" + - "chr17" + - "chr18" + - "chr19" + - "chr20" + - "chr21" + - "chr22" + - "chrX" + - "chrY" +snpeffdatadir: + class: Directory + location: keep:66c966928931de252274772c76f73025+52054 +genomeversion_array: ["GRCh38.86"] +dbsnp: + class: File + location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf +gnomaddir: + class: Directory + location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135 +fastadirs: + - class: Directory + location: keep:7f3ba4ea8d7504ce040da01507c71f6f+138479 + - class: Directory + location: keep:5f4e90fbbdbc9496bc80d68a0ed3c082+138479 + - class: Directory + location: keep:6a9d10762c98f71467957995f43a3227+138493 + - class: Directory + location: keep:8803e18d1c5d3655e4c65a67fcde261f+138469 + - class: Directory + location: keep:e7a60ec6ce207229bb5dd628882b7145+138497 + - class: Directory + location: keep:01d219030897f7e1550c5aa5d4df7a35+137556 + - class: Directory + location: keep:afbfe6c1d7d751607c47de7dcc3b839e+137959 + - class: Directory + location: keep:8eff0f12f4b3c1fdc4332326c2ea6674+137107 + - class: Directory + location: keep:64ca59668a2dcf61f351afcf350d42e7+138471 + - class: Directory + location: keep:62d89d039fad7221f16c34b722e097af+138465 + - class: Directory + location: keep:5a71084d1947ce8f9454b4b2e0796b41+138503 + - class: Directory + location: keep:69c4e14981608d6be9aeaf6da94aebfb+138459 + - class: Directory + location: keep:0eaa03f63440c825bc722022eac85448+138463 + - class: Directory + location: keep:e167599ed16c7b1f7bff217fc5d80a4b+138457 + - class: Directory + location: keep:cbf21c73503b4cb792b5231c46358ba3+138523 + - class: Directory + location: keep:11683cd5f820ff9d42d3888bbb00221f+138205 + - class: Directory + location: keep:8b09dae12292f375aa98de6f36be53c2+138491 + - class: Directory + location: keep:8d02abcdef3e1b3ab910303df4331399+137649 + - class: Directory + location: keep:c3884ba793bf5771ba7267246f747893+138479 + - class: Directory + location: keep:e96460313d37f2e7d0aa647d3b88f6f8+138479 + - class: Directory + location: keep:fa72acb0219d6833f90beb1e3874e485+138481 + - class: Directory + location: keep:01e5edc00295926a91351e8069cb144f+138477 + - class: Directory + location: keep:bf076ef6270cf0f2cf3c6aaa79893cc8+138048 + - class: Directory + location: keep:66d688c08060cd9750fb6e46cce3fa39+138479 + - class: Directory + location: keep:9c78e31f255406bee37d8960ce79c185+138471 + - class: Directory + location: keep:100591a332d9f256842f086fa92177dc+138735 + - class: Directory + location: keep:ef10e9c7591c5466d5791666f830bf06+138725 + - class: Directory + location: keep:8b42e879ae88a25fb4ca5888abea9c38+138687 + - class: Directory + location: keep:ffa0d2888d14b2940562801bc163d4b2+138667 + - class: Directory + location: keep:98854a1e4db58922e5e712507217e2b5+138483 + - class: Directory + location: keep:f72b41e5d5cb7dbb4171eac06f86f6a1+138479 + - class: Directory + location: keep:381b930fc4059b819bda2c414927deaf+138477 + - class: Directory + location: keep:558b093245095d1029f4cbe1f2726e99+138056 + - class: Directory + location: keep:0f937cc7f4e6f8b47d80e28f0a99bc8a+138479 + - class: Directory + location: keep:95aba9cd1cce2de6fbc6c5851c81387d+138479 + - class: Directory + location: keep:94328ab7b661267810f9f0197eae70da+138479 + - class: Directory + location: keep:666124ae8a9a6734bba5de48490748c0+138479 + - class: Directory + location: keep:98359aa9baa931eea204298f7b26563c+138479 + - class: Directory + location: keep:c061711a7b7e7d8acddfde36e785cc77+138479 + - class: Directory + location: keep:ac21ed8a0e6af91debc39ddee1197787+138503 + - class: Directory + location: keep:41ab49b22d2f4ed90fa4f446ef7fbb8d+138357 + - class: Directory + location: keep:0e6d1ff3738abd240efaca9079f62f46+138851 + - class: Directory + location: keep:ed82b93d0ca76ffd666457399c2462cd+138101 + - class: Directory + location: keep:7efb683d9556c87c69fb1cbbaa290820+137674 + - class: Directory + location: keep:377b8665f04091581e560614ba20ca46+138515 + - class: Directory + location: keep:da7207cc977b12f60372b742785eec62+138491 + - class: Directory + location: keep:54403688627695480b373eb354dc1c7f+138479 + - class: Directory + location: keep:dff90a9f9feaa5ed1e7c86f1e79926cc+138489 + - class: Directory + location: keep:f82c35228c761a1e0f3ec9f5687efddd+137093 + - class: Directory + location: keep:661cd8f54deabacb49ec78db93ed4578+138479 + - class: Directory + location: keep:b326e6b7773354041b03dfe4564ffd0b+137095 + - class: Directory + location: keep:853632bf7879231f9ac24096e52f95e1+138479 + - class: Directory + location: keep:f190f2167bcd4fe819878efcde43e6f0+138059 + - class: Directory + location: keep:51f70f5861f61aca2e441e2c5d282150+138479 + - class: Directory + location: keep:6b4ecc7f949c824f6e2c99ccd8450238+138479 + - class: Directory + location: keep:11ca0c4ee2a4409c6f43470515c897fd+138479 + - class: Directory + location: keep:9f89f0c88e2b709af225e8ba2f8bff8d+138477 + - class: Directory + location: keep:e8f37c72e41dcc35f02d4fbe5da08a8b+138485 + - class: Directory + location: keep:1bbc537e0b7ac50e1e9ec5fe12c5bb2e+138580 + - class: Directory + location: keep:f0322067b85babcae2ecdd19d34f7597+138478 + - class: Directory + location: keep:47aa083e8d0a6c4c297d036165902282+138478 + - class: Directory + location: keep:9f9d01fd058a06f30955f6b9709194fb+138531 + - class: Directory + location: keep:f382e81786847d7395684e858786fc62+138479 + - class: Directory + location: keep:42b9f228c66a1bb26f37f6751f4f125f+138479 + - class: Directory + location: keep:abfec28b755a4826f3e58703dbbd1ac9+138059 + - class: Directory + location: keep:6346d82f0746faee45f5ba194dd25d0b+138479 + - class: Directory + location: keep:046da89ef991cc019f5092c326e2b8f8+138479 + - class: Directory + location: keep:52bdef8f26c69eb338d5d94b08cce125+138479 + - class: Directory + location: keep:4e0dbaba2b52fd93cb2c279415186a7b+138599 + - class: Directory + location: keep:fa21ef0cd50701250a927b4993df53e7+138297 + - class: Directory + location: keep:a333d969b2b84dfe7a2abb5889763c26+138675 + - class: Directory + location: keep:5fcb253d3584035246f5b4bcd60fa348+137291 + - class: Directory + location: keep:8a73aff41e5b603adddcf1eefe2e15e9+137209 + - class: Directory + location: keep:e77aefd6c8f51abcf404e482a9b06b0d+134402 + - class: Directory + location: keep:6ae72d55cf26d4afa12dab3970e0bf59+136783 + - class: Directory + location: keep:b029a9d42e085a45a2ce1840d0961eb7+137879 + - class: Directory + location: keep:41cd1f1511d617ec6e4bfb42db64be03+138187 + - class: Directory + location: keep:55ad863fef7d8dccf4db32f282f752c4+135119 + - class: Directory + location: keep:30e016d511a2443985d2b051638a419b+138047 + - class: Directory + location: keep:2711053e417e15034cae0c8aec568a45+138352 + - class: Directory + location: keep:020cddfa8082c5476c011aaa189cf518+138479 + - class: Directory + location: keep:5bb2df76b0869bea8833784a934f7021+138479 + - class: Directory + location: keep:112983eafc025aaeb54d81cf01ce3a4e+138479 + - class: Directory + location: keep:6e1bf98f088ced1ef8b60f562374325b+138479 + - class: Directory + location: keep:153d189544f36dd39610087a782589ca+138479 + - class: Directory + location: keep:ff71aa46f8f4134f1556d4b19d0b9307+138058 + - class: Directory + location: keep:e996083281665242208b0773b0d6287c+138058 + - class: Directory + location: keep:3f2d1ab2f1fa98341ddab5b3cc53ec2e+138479 + - class: Directory + location: keep:abbcfbc8fc06988a2b0d94b0c443e459+138479 + - class: Directory + location: keep:7e6f8a9c9244ea7c6e3341d1b5aa6b32+138479 + - class: Directory + location: keep:e08ebbc428bc0604e07f72b21410ce58+138479 + - class: Directory + location: keep:e2367de25b3dffcb1ac1aa7373b7a283+138479 + - class: Directory + location: keep:f4318c61400198c3d44895e184e79b4c+138479 + - class: Directory + location: keep:5a0d263b7469871268e31919899e6103+138445 + - class: Directory + location: keep:0c5a32dc706c195619b3f84bac5d3978+136353 + - class: Directory + location: keep:3843ae092703f6696cb5f42f464f4583+134452 + - class: Directory + location: keep:745d1dec2cfeb0f28f3eaf9dedd6a962+136783 + - class: Directory + location: keep:570f09f532cbd74146bd8ba3cc363263+137749 + - class: Directory + location: keep:ec659571f3a78e63e457797fc58f1828+137677 + - class: Directory + location: keep:60da5cf26a882d1b4e024326cd7d893b+130004 + - class: Directory + location: keep:13f6862e9770f63bd44b6ef539541dc3+138079 + - class: Directory + location: keep:83fb84b000e58c16fe17adef5de277b1+138079 + - class: Directory + location: keep:2865b8e5e49decb8b65b42ae060bcc96+138079 + - class: Directory + location: keep:fc49628c14a44abf3e54c4a956aa5888+136845 + - class: Directory + location: keep:6c52a3c5f4bc152851ad9cdcacaabf23+138089 + - class: Directory + location: keep:c08e22c6705b07111205c0edc2c3872d+136586 + - class: Directory + location: keep:c577425ff9f3a0d886f3426065a72ca4+137691 + - class: Directory + location: keep:ddf235932a0e2176593aac5a4ac42861+136337 + - class: Directory + location: keep:aa28a7e5d5657abbd0134e66cf6a89a5+133731 + - class: Directory + location: keep:49b91c25d606fd2fa451d2b7c92193a8+137879 + - class: Directory + location: keep:838fd46abf759e68bdc17f29d1680752+138063 + - class: Directory + location: keep:a1391969d1b8e92d8c98ed4ad8fad8ed+136665 + - class: Directory + location: keep:3dfbeda6c564f9f275f74c694022b52d+135167 + - class: Directory + location: keep:94de36359231b01ef2e277083bfe5287+137677 + - class: Directory + location: keep:2370e1db54c8e0e4b2d314c4a7984304+137845 + - class: Directory + location: keep:aa76e46ce3409c0cccb96fe9f7ad960a+138170 + - class: Directory + location: keep:5bb1f2fcb9961e0c0aa7159cabdd2fd3+137678 + - class: Directory + location: keep:807a04e5c2d91d5f454323b6fc9e035d+137679 + - class: Directory + location: keep:368d3f71ad4e0822a3bac8abe67ed536+133665 + - class: Directory + location: keep:0d5b719030ae9bffbfbcd2c0855c66c7+128362 + - class: Directory + location: keep:2c08aca1b2c0537e0d153a21dc0561f8+136859 + - class: Directory + location: keep:9acd969c8b08cedce1fd8d769d214b75+137559 + - class: Directory + location: keep:e27d19c6a505cd01eea3f0a13ba46a83+137977 + - class: Directory + location: keep:127cc72a3bd0953bb19e05b83f5190d0+26161 + - class: Directory + location: keep:fd615aa5b1fcf7f2695c4ec8f5b9351a+55569 + - class: Directory + location: keep:dfed457efee3414f1c1f7536fe83eed3+109552 + - class: Directory + location: keep:14388f62c868254149d5972c7d80d28d+97366 + - class: Directory + location: keep:5e99d1ef6b90001e01450ffe093e9493+135278 + - class: Directory + location: keep:153533e237092985535d7f9b0a4a354e+135278 + - class: Directory + location: keep:1a0d1ede9fcabaabce592c86bec88db0+135278 + - class: Directory + location: keep:281ec2620a88b7274b44ece0ca96b543+135278 + - class: Directory + location: keep:9d2dfc3510b82e5249045e71862a44d9+135278 + - class: Directory + location: keep:a3191cb63a48307e9cd97cc6de42d83c+135194 + - class: Directory + location: keep:d80d9356d64396465ca61585b4c3031b+135419 + - class: Directory + location: keep:881e67efac709867dafc480116c2edf1+138479 + - class: Directory + location: keep:5ffe1b2bcdb75190b228d8e7117baaa0+138479 + - class: Directory + location: keep:6a671736ca4bf130e3ab507b702024bc+136672 + - class: Directory + location: keep:504c77a1298ff2649af35527dec8b467+138058 + - class: Directory + location: keep:e19f2c238e685f62f52c58eb4c44d703+134867 + - class: Directory + location: keep:afad6e6fd6933d8919a4a39712e80afe+67509 + - class: Directory + location: keep:09771439ce1431d168b15bb36cc8cacb+136998 + - class: Directory + location: keep:cb3b59ec7468d27ef643380568f945e4+138679 + - class: Directory + location: keep:b3ec1453c963d0b5c36134bd3e7c97dc+138679 + - class: Directory + location: keep:52d413d47ebc5abaeda098254ce6d517+138629 + - class: Directory + location: keep:43e6437a5dc4d833ed2e167b42b9430a+138059 + - class: Directory + location: keep:d081f62909038dd25c499972547ced53+138479 + - class: Directory + location: keep:bcd3a181bcb601c1e57f7cb7eeb4d270+137975 + - class: Directory + location: keep:9882f01b1cc6172959a3efab49d89397+78041 diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315_ADNI.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315_ADNI.yml new file mode 100644 index 0000000000..a2aa729287 --- /dev/null +++ b/cwl/lightning/yml/fasta2numpy-multi-wf-0831_0315_ADNI.yml @@ -0,0 +1,353 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +refdirs: + - class: Directory + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761 +batchsize: 12 +matchgenome_array: ["ADNI"] +regions_nestedarray: + - - null +threads_array: [10] +mergeoutput_array: ["false"] +expandregions_array: [0] +phenotypesdir: + class: Directory + location: keep:2xpu4-4zz18-yq0njnojx7kaj34 +readmeinfo: ["2022-03-22", "called using VCPA1.1 pipeline, ADNI only", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"] +chrs: + - "chr1" + - "chr2" + - "chr3" + - "chr4" + - "chr5" + - "chr6" + - "chr7" + - "chr8" + - "chr9" + - "chr10" + - "chr11" + - "chr12" + - "chr13" + - "chr14" + - "chr15" + - "chr16" + - "chr17" + - "chr18" + - "chr19" + - "chr20" + - "chr21" + - "chr22" + - "chrX" + - "chrY" +snpeffdatadir: + class: Directory + location: keep:66c966928931de252274772c76f73025+52054 +genomeversion_array: ["GRCh38.86"] +dbsnp: + class: File + location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf +gnomaddir: + class: Directory + location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135 +fastadirs: + - class: Directory + location: keep:7f3ba4ea8d7504ce040da01507c71f6f+138479 + - class: Directory + location: keep:5f4e90fbbdbc9496bc80d68a0ed3c082+138479 + - class: Directory + location: keep:6a9d10762c98f71467957995f43a3227+138493 + - class: Directory + location: keep:8803e18d1c5d3655e4c65a67fcde261f+138469 + - class: Directory + location: keep:e7a60ec6ce207229bb5dd628882b7145+138497 + - class: Directory + location: keep:01d219030897f7e1550c5aa5d4df7a35+137556 + - class: Directory + location: keep:afbfe6c1d7d751607c47de7dcc3b839e+137959 + - class: Directory + location: keep:8eff0f12f4b3c1fdc4332326c2ea6674+137107 + - class: Directory + location: keep:64ca59668a2dcf61f351afcf350d42e7+138471 + - class: Directory + location: keep:62d89d039fad7221f16c34b722e097af+138465 + - class: Directory + location: keep:5a71084d1947ce8f9454b4b2e0796b41+138503 + - class: Directory + location: keep:69c4e14981608d6be9aeaf6da94aebfb+138459 + - class: Directory + location: keep:0eaa03f63440c825bc722022eac85448+138463 + - class: Directory + location: keep:e167599ed16c7b1f7bff217fc5d80a4b+138457 + - class: Directory + location: keep:cbf21c73503b4cb792b5231c46358ba3+138523 + - class: Directory + location: keep:11683cd5f820ff9d42d3888bbb00221f+138205 + - class: Directory + location: keep:8b09dae12292f375aa98de6f36be53c2+138491 + - class: Directory + location: keep:8d02abcdef3e1b3ab910303df4331399+137649 + - class: Directory + location: keep:c3884ba793bf5771ba7267246f747893+138479 + - class: Directory + location: keep:e96460313d37f2e7d0aa647d3b88f6f8+138479 + - class: Directory + location: keep:fa72acb0219d6833f90beb1e3874e485+138481 + - class: Directory + location: keep:01e5edc00295926a91351e8069cb144f+138477 + - class: Directory + location: keep:bf076ef6270cf0f2cf3c6aaa79893cc8+138048 + - class: Directory + location: keep:66d688c08060cd9750fb6e46cce3fa39+138479 + - class: Directory + location: keep:9c78e31f255406bee37d8960ce79c185+138471 + - class: Directory + location: keep:100591a332d9f256842f086fa92177dc+138735 + - class: Directory + location: keep:ef10e9c7591c5466d5791666f830bf06+138725 + - class: Directory + location: keep:8b42e879ae88a25fb4ca5888abea9c38+138687 + - class: Directory + location: keep:ffa0d2888d14b2940562801bc163d4b2+138667 + - class: Directory + location: keep:98854a1e4db58922e5e712507217e2b5+138483 + - class: Directory + location: keep:f72b41e5d5cb7dbb4171eac06f86f6a1+138479 + - class: Directory + location: keep:381b930fc4059b819bda2c414927deaf+138477 + - class: Directory + location: keep:558b093245095d1029f4cbe1f2726e99+138056 + - class: Directory + location: keep:0f937cc7f4e6f8b47d80e28f0a99bc8a+138479 + - class: Directory + location: keep:95aba9cd1cce2de6fbc6c5851c81387d+138479 + - class: Directory + location: keep:94328ab7b661267810f9f0197eae70da+138479 + - class: Directory + location: keep:666124ae8a9a6734bba5de48490748c0+138479 + - class: Directory + location: keep:98359aa9baa931eea204298f7b26563c+138479 + - class: Directory + location: keep:c061711a7b7e7d8acddfde36e785cc77+138479 + - class: Directory + location: keep:ac21ed8a0e6af91debc39ddee1197787+138503 + - class: Directory + location: keep:41ab49b22d2f4ed90fa4f446ef7fbb8d+138357 + - class: Directory + location: keep:0e6d1ff3738abd240efaca9079f62f46+138851 + - class: Directory + location: keep:ed82b93d0ca76ffd666457399c2462cd+138101 + - class: Directory + location: keep:7efb683d9556c87c69fb1cbbaa290820+137674 + - class: Directory + location: keep:377b8665f04091581e560614ba20ca46+138515 + - class: Directory + location: keep:da7207cc977b12f60372b742785eec62+138491 + - class: Directory + location: keep:54403688627695480b373eb354dc1c7f+138479 + - class: Directory + location: keep:dff90a9f9feaa5ed1e7c86f1e79926cc+138489 + - class: Directory + location: keep:f82c35228c761a1e0f3ec9f5687efddd+137093 + - class: Directory + location: keep:661cd8f54deabacb49ec78db93ed4578+138479 + - class: Directory + location: keep:b326e6b7773354041b03dfe4564ffd0b+137095 + - class: Directory + location: keep:853632bf7879231f9ac24096e52f95e1+138479 + - class: Directory + location: keep:f190f2167bcd4fe819878efcde43e6f0+138059 + - class: Directory + location: keep:51f70f5861f61aca2e441e2c5d282150+138479 + - class: Directory + location: keep:6b4ecc7f949c824f6e2c99ccd8450238+138479 + - class: Directory + location: keep:11ca0c4ee2a4409c6f43470515c897fd+138479 + - class: Directory + location: keep:9f89f0c88e2b709af225e8ba2f8bff8d+138477 + - class: Directory + location: keep:e8f37c72e41dcc35f02d4fbe5da08a8b+138485 + - class: Directory + location: keep:1bbc537e0b7ac50e1e9ec5fe12c5bb2e+138580 + - class: Directory + location: keep:f0322067b85babcae2ecdd19d34f7597+138478 + - class: Directory + location: keep:47aa083e8d0a6c4c297d036165902282+138478 + - class: Directory + location: keep:9f9d01fd058a06f30955f6b9709194fb+138531 + - class: Directory + location: keep:f382e81786847d7395684e858786fc62+138479 + - class: Directory + location: keep:42b9f228c66a1bb26f37f6751f4f125f+138479 + - class: Directory + location: keep:abfec28b755a4826f3e58703dbbd1ac9+138059 + - class: Directory + location: keep:6346d82f0746faee45f5ba194dd25d0b+138479 + - class: Directory + location: keep:046da89ef991cc019f5092c326e2b8f8+138479 + - class: Directory + location: keep:52bdef8f26c69eb338d5d94b08cce125+138479 + - class: Directory + location: keep:4e0dbaba2b52fd93cb2c279415186a7b+138599 + - class: Directory + location: keep:fa21ef0cd50701250a927b4993df53e7+138297 + - class: Directory + location: keep:a333d969b2b84dfe7a2abb5889763c26+138675 + - class: Directory + location: keep:5fcb253d3584035246f5b4bcd60fa348+137291 + - class: Directory + location: keep:8a73aff41e5b603adddcf1eefe2e15e9+137209 + - class: Directory + location: keep:e77aefd6c8f51abcf404e482a9b06b0d+134402 + - class: Directory + location: keep:6ae72d55cf26d4afa12dab3970e0bf59+136783 + - class: Directory + location: keep:b029a9d42e085a45a2ce1840d0961eb7+137879 + - class: Directory + location: keep:41cd1f1511d617ec6e4bfb42db64be03+138187 + - class: Directory + location: keep:55ad863fef7d8dccf4db32f282f752c4+135119 + - class: Directory + location: keep:30e016d511a2443985d2b051638a419b+138047 + - class: Directory + location: keep:2711053e417e15034cae0c8aec568a45+138352 + - class: Directory + location: keep:020cddfa8082c5476c011aaa189cf518+138479 + - class: Directory + location: keep:5bb2df76b0869bea8833784a934f7021+138479 + - class: Directory + location: keep:112983eafc025aaeb54d81cf01ce3a4e+138479 + - class: Directory + location: keep:6e1bf98f088ced1ef8b60f562374325b+138479 + - class: Directory + location: keep:153d189544f36dd39610087a782589ca+138479 + - class: Directory + location: keep:ff71aa46f8f4134f1556d4b19d0b9307+138058 + - class: Directory + location: keep:e996083281665242208b0773b0d6287c+138058 + - class: Directory + location: keep:3f2d1ab2f1fa98341ddab5b3cc53ec2e+138479 + - class: Directory + location: keep:abbcfbc8fc06988a2b0d94b0c443e459+138479 + - class: Directory + location: keep:7e6f8a9c9244ea7c6e3341d1b5aa6b32+138479 + - class: Directory + location: keep:e08ebbc428bc0604e07f72b21410ce58+138479 + - class: Directory + location: keep:e2367de25b3dffcb1ac1aa7373b7a283+138479 + - class: Directory + location: keep:f4318c61400198c3d44895e184e79b4c+138479 + - class: Directory + location: keep:5a0d263b7469871268e31919899e6103+138445 + - class: Directory + location: keep:0c5a32dc706c195619b3f84bac5d3978+136353 + - class: Directory + location: keep:3843ae092703f6696cb5f42f464f4583+134452 + - class: Directory + location: keep:745d1dec2cfeb0f28f3eaf9dedd6a962+136783 + - class: Directory + location: keep:570f09f532cbd74146bd8ba3cc363263+137749 + - class: Directory + location: keep:ec659571f3a78e63e457797fc58f1828+137677 + - class: Directory + location: keep:60da5cf26a882d1b4e024326cd7d893b+130004 + - class: Directory + location: keep:13f6862e9770f63bd44b6ef539541dc3+138079 + - class: Directory + location: keep:83fb84b000e58c16fe17adef5de277b1+138079 + - class: Directory + location: keep:2865b8e5e49decb8b65b42ae060bcc96+138079 + - class: Directory + location: keep:fc49628c14a44abf3e54c4a956aa5888+136845 + - class: Directory + location: keep:6c52a3c5f4bc152851ad9cdcacaabf23+138089 + - class: Directory + location: keep:c08e22c6705b07111205c0edc2c3872d+136586 + - class: Directory + location: keep:c577425ff9f3a0d886f3426065a72ca4+137691 + - class: Directory + location: keep:ddf235932a0e2176593aac5a4ac42861+136337 + - class: Directory + location: keep:aa28a7e5d5657abbd0134e66cf6a89a5+133731 + - class: Directory + location: keep:49b91c25d606fd2fa451d2b7c92193a8+137879 + - class: Directory + location: keep:838fd46abf759e68bdc17f29d1680752+138063 + - class: Directory + location: keep:a1391969d1b8e92d8c98ed4ad8fad8ed+136665 + - class: Directory + location: keep:3dfbeda6c564f9f275f74c694022b52d+135167 + - class: Directory + location: keep:94de36359231b01ef2e277083bfe5287+137677 + - class: Directory + location: keep:2370e1db54c8e0e4b2d314c4a7984304+137845 + - class: Directory + location: keep:aa76e46ce3409c0cccb96fe9f7ad960a+138170 + - class: Directory + location: keep:5bb1f2fcb9961e0c0aa7159cabdd2fd3+137678 + - class: Directory + location: keep:807a04e5c2d91d5f454323b6fc9e035d+137679 + - class: Directory + location: keep:368d3f71ad4e0822a3bac8abe67ed536+133665 + - class: Directory + location: keep:0d5b719030ae9bffbfbcd2c0855c66c7+128362 + - class: Directory + location: keep:2c08aca1b2c0537e0d153a21dc0561f8+136859 + - class: Directory + location: keep:9acd969c8b08cedce1fd8d769d214b75+137559 + - class: Directory + location: keep:e27d19c6a505cd01eea3f0a13ba46a83+137977 + - class: Directory + location: keep:127cc72a3bd0953bb19e05b83f5190d0+26161 + - class: Directory + location: keep:fd615aa5b1fcf7f2695c4ec8f5b9351a+55569 + - class: Directory + location: keep:dfed457efee3414f1c1f7536fe83eed3+109552 + - class: Directory + location: keep:14388f62c868254149d5972c7d80d28d+97366 + - class: Directory + location: keep:5e99d1ef6b90001e01450ffe093e9493+135278 + - class: Directory + location: keep:153533e237092985535d7f9b0a4a354e+135278 + - class: Directory + location: keep:1a0d1ede9fcabaabce592c86bec88db0+135278 + - class: Directory + location: keep:281ec2620a88b7274b44ece0ca96b543+135278 + - class: Directory + location: keep:9d2dfc3510b82e5249045e71862a44d9+135278 + - class: Directory + location: keep:a3191cb63a48307e9cd97cc6de42d83c+135194 + - class: Directory + location: keep:d80d9356d64396465ca61585b4c3031b+135419 + - class: Directory + location: keep:881e67efac709867dafc480116c2edf1+138479 + - class: Directory + location: keep:5ffe1b2bcdb75190b228d8e7117baaa0+138479 + - class: Directory + location: keep:6a671736ca4bf130e3ab507b702024bc+136672 + - class: Directory + location: keep:504c77a1298ff2649af35527dec8b467+138058 + - class: Directory + location: keep:e19f2c238e685f62f52c58eb4c44d703+134867 + - class: Directory + location: keep:afad6e6fd6933d8919a4a39712e80afe+67509 + - class: Directory + location: keep:09771439ce1431d168b15bb36cc8cacb+136998 + - class: Directory + location: keep:cb3b59ec7468d27ef643380568f945e4+138679 + - class: Directory + location: keep:b3ec1453c963d0b5c36134bd3e7c97dc+138679 + - class: Directory + location: keep:52d413d47ebc5abaeda098254ce6d517+138629 + - class: Directory + location: keep:43e6437a5dc4d833ed2e167b42b9430a+138059 + - class: Directory + location: keep:d081f62909038dd25c499972547ced53+138479 + - class: Directory + location: keep:bcd3a181bcb601c1e57f7cb7eeb4d270+137975 + - class: Directory + location: keep:9882f01b1cc6172959a3efab49d89397+78041 diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml new file mode 100644 index 0000000000..452681c702 --- /dev/null +++ b/cwl/lightning/yml/fasta2numpy-multi-wf-A-PRHS-PR000971-BL-COL-47620BL1.yml @@ -0,0 +1,59 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +refdirs: + - class: Directory + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761 +batchsize: 12 +matchgenome_array: [""] +regions_nestedarray: + - - null +threads_array: [10] +mergeoutput_array: ["false"] +expandregions_array: [0] +phenotypesdir: + class: Directory + location: keep:2xpu4-4zz18-yq0njnojx7kaj34 +readmeinfo: ["2022-03-22", "called using VCPA1.1 pipeline", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"] +chrs: + - "chr1" + - "chr2" + - "chr3" + - "chr4" + - "chr5" + - "chr6" + - "chr7" + - "chr8" + - "chr9" + - "chr10" + - "chr11" + - "chr12" + - "chr13" + - "chr14" + - "chr15" + - "chr16" + - "chr17" + - "chr18" + - "chr19" + - "chr20" + - "chr21" + - "chr22" + - "chrX" + - "chrY" +snpeffdatadir: + class: Directory + location: keep:66c966928931de252274772c76f73025+52054 +genomeversion_array: ["GRCh38.86"] +dbsnp: + class: File + location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf +gnomaddir: + class: Directory + location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135 +fastadirs: + - class: Directory + location: keep:22524bac46f1363efaadea1d845f8c90+1442 diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-public.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-public.yml new file mode 100644 index 0000000000..2c03b43916 --- /dev/null +++ b/cwl/lightning/yml/fasta2numpy-multi-wf-public.yml @@ -0,0 +1,327 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +refdirs: + - class: Directory + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761 +batchsize: 12 +matchgenome_array: [""] +regions_nestedarray: + - - null +threads_array: [10] +mergeoutput_array: ["false"] +expandregions_array: [0] +phenotypesdir: + class: Directory + location: keep:2xpu4-4zz18-yq0njnojx7kaj34 +readmeinfo: ["2022-08-27", "public data", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"] +chrs: + - "chr1" + - "chr2" + - "chr3" + - "chr4" + - "chr5" + - "chr6" + - "chr7" + - "chr8" + - "chr9" + - "chr10" + - "chr11" + - "chr12" + - "chr13" + - "chr14" + - "chr15" + - "chr16" + - "chr17" + - "chr18" + - "chr19" + - "chr20" + - "chr21" + - "chr22" + - "chrX" + - "chrY" +snpeffdatadir: + class: Directory + location: keep:66c966928931de252274772c76f73025+52054 +genomeversion_array: ["GRCh38.86"] +dbsnp: + class: File + location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf +gnomaddir: + class: Directory + location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135 +fastadirs: + - class: Directory + location: keep:f70e65326f36502319b520e0abe532a4+86845 + - class: Directory + location: keep:2ae6287169aab85ce3d4b62565aef7b7+86852 + - class: Directory + location: keep:5ba1f0e9a8b99768414d50020ce171d1+86860 + - class: Directory + location: keep:ec4b7b56b8f40bcdd106e0a9b343eb0b+86858 + - class: Directory + location: keep:354e180b1dcca07e1afc798ac6e46170+86854 + - class: Directory + location: keep:da769c6fd883225ed10320e8d0a8891c+20975 + - class: Directory + location: keep:6993bbbb53913aab66a11c004fad3817+21088 + - class: Directory + location: keep:c586e7a0b300ab3c59ec622b6b4eb43d+21087 + - class: Directory + location: keep:6f9199c7e66c0aa72b570855500ba8bd+20984 + - class: Directory + location: keep:bb22967697aac47adc7b4139bcc90311+20970 + - class: Directory + location: keep:d8b9d7a259e6674c9dcb3c410b8aba93+86847 + - class: Directory + location: keep:c0c62e1429a60268728fe980b06d1cd4+78425 + - class: Directory + location: keep:392860853b0525b9af785ade7553ec1a+86859 + - class: Directory + location: keep:348d9ad2b199ac1969776bc824bd5678+86850 + - class: Directory + location: keep:8524d2e65adaf66619385f4ce5b7d12c+86856 + - class: Directory + location: keep:fed854d8ad8d272d80f0f1727526c0b0+86850 + - class: Directory + location: keep:fd9672a8eed704079acb034aa3a9ff39+86851 + - class: Directory + location: keep:12976e792efa5b41bae0d1747dcfae9f+86838 + - class: Directory + location: keep:d479e7f2068dfb99bc77a5fb89974aa6+86843 + - class: Directory + location: keep:c6df9ee5438b0c0375dcc36596a55a08+21037 + - class: Directory + location: keep:a8ef8420e1349d35bd8c561897c69d07+21037 + - class: Directory + location: keep:ae21ed97df861b3b6d7ab06fdb791853+21037 + - class: Directory + location: keep:3e7237716e8b2abbb3201d58127d7fad+21037 + - class: Directory + location: keep:804223776db91718ef3950f035748aab+20923 + - class: Directory + location: keep:b692a2d5743866421fef3b59b5459ae7+21037 + - class: Directory + location: keep:b849f97ef209feacae0a9e2b01f76256+21037 + - class: Directory + location: keep:8a0d511e5386be90200c7f1a41494c6b+21511 + - class: Directory + location: keep:47a99ccd72dfcee0d71686605b0aa30d+21037 + - class: Directory + location: keep:b91d899117aedad5e83723046bcb85cd+21037 + - class: Directory + location: keep:56c00d8b4eba69b6232135fa35362e95+21037 + - class: Directory + location: keep:b1fa3eb3e3ea592172cf2dd0400713ec+21037 + - class: Directory + location: keep:9d26ff7a6725f4295ed2b8e54dfc0a9e+21037 + - class: Directory + location: keep:9a64e0e79c49d8d564d5268c830ac7a4+86837 + - class: Directory + location: keep:388c45e48909fac3e3341d421497c882+20872 + - class: Directory + location: keep:c33f40e0fff14cebfdacfecb932a2d16+21037 + - class: Directory + location: keep:d280663b4ffcf7fc9cddcbd3ebbc0ecc+21037 + - class: Directory + location: keep:4db992a5512733f46a4c26d8a1ae47be+21440 + - class: Directory + location: keep:37f6bfa92ce9dd08e34d62edb6051603+86838 + - class: Directory + location: keep:84a070c697ceec31000bf24de2cc65ba+21037 + - class: Directory + location: keep:b8bcb829ee3a883bc5f4ed7e35a5d127+21037 + - class: Directory + location: keep:b6c079b2a6dec889c79f4072737f8ba1+20164 + - class: Directory + location: keep:713f9b8dd6ee5c1d5c82dacbd0ce8e65+21037 + - class: Directory + location: keep:55dbfb376eef1a2d6547d85b3604ae96+21037 + - class: Directory + location: keep:f796cb3f2c9f96b94f707ad3aa42a57d+21037 + - class: Directory + location: keep:e65dea3f9460d946abdfdd323c302e9f+21037 + - class: Directory + location: keep:ec7268728cc5f293f34301044306acd2+20985 + - class: Directory + location: keep:fdf43161f94ef9171a8c1963e92b47e3+21037 + - class: Directory + location: keep:5f1fecbc66525f0924ced98bcae5c0ca+21523 + - class: Directory + location: keep:66e5991fa0f6c39648c4c2fc33114ddf+21037 + - class: Directory + location: keep:c57f6713ae289b6534996aceb032bb42+20586 + - class: Directory + location: keep:8a49b79303015e7381b25991b18df204+21037 + - class: Directory + location: keep:9a65a4d32f1d948a08212dd1b6e73512+21037 + - class: Directory + location: keep:9fcfdf94365cbff9bccc2f7ed75d951e+21037 + - class: Directory + location: keep:45d0e870bd2d26d5b34b1aa65d8e48b5+21037 + - class: Directory + location: keep:6a02e00dcfe7dda5ce7a68b0cd547d4b+21037 + - class: Directory + location: keep:e887895e2d866e4ce98a56ae8edfda86+20978 + - class: Directory + location: keep:f09b0c7feb0f70c76f28471c9b45bdfc+21513 + - class: Directory + location: keep:578d8a6e623d7b9c36c85aabc08fc11d+21037 + - class: Directory + location: keep:4937ca42877ff56d32f0155d03c72721+20421 + - class: Directory + location: keep:43779f0116bf633ca757c029bc72c283+7877 + - class: Directory + location: keep:a9a04ce62720f93c97ac292f3a0fc565+21037 + - class: Directory + location: keep:ae042538726c56c73158ec979a3aab78+17520 + - class: Directory + location: keep:132fd51727f13ffc5d146e2b5879bc0a+21037 + - class: Directory + location: keep:b70d5cd30bec2518977dd0f4264c31c6+21037 + - class: Directory + location: keep:9d65f08e988b2e1099a7484fbded50f0+21037 + - class: Directory + location: keep:f4f743252457b49968b36b87e2d892de+21037 + - class: Directory + location: keep:b9846a7d4328a5d5ae489ddf2fdcbd77+20923 + - class: Directory + location: keep:00c9233f5bbd92975019dfa9d7b85d95+21037 + - class: Directory + location: keep:c67daae4ad6e6dbb723da66b06852b40+21519 + - class: Directory + location: keep:14a37d50a3be9facfa187e88d5d2181e+21037 + - class: Directory + location: keep:76db97f00e45b2d1bcd802a27c12cc8f+21079 + - class: Directory + location: keep:82e07421ee8c32d40f23a4563f8fb75e+21442 + - class: Directory + location: keep:35a7ea114db1b31d210a482f314faaa0+21037 + - class: Directory + location: keep:67438d55eae3831633b8203732bb16e9+21037 + - class: Directory + location: keep:d1e690b74aa49248719c58c4d0ba79b3+21037 + - class: Directory + location: keep:551267f171357ef3fbbdde531ded3f2c+21037 + - class: Directory + location: keep:c7b5f19064c6565020cd3fba15d8afb4+20927 + - class: Directory + location: keep:a95ca99d6bc784b0e8fad8c80e526b5f+21037 + - class: Directory + location: keep:d64d3dd3f11f1611efa23baa4994e87a+21037 + - class: Directory + location: keep:7df93e28b40be1740f6c4ca510b2970f+21443 + - class: Directory + location: keep:5e1dad08f1139addc7e1c21b69a5c2a8+21037 + - class: Directory + location: keep:3206c2a7abd99a59e86bab81fb4e3754+21518 + - class: Directory + location: keep:41bb1ba630a91f1c599a4890380caf80+86846 + - class: Directory + location: keep:9614163f41df1585913d71526592004b+20757 + - class: Directory + location: keep:db85f1601caadbdff8119d9e35d19cdb+86851 + - class: Directory + location: keep:756e93603f4c4e6a5c8d36c48b191d19+21037 + - class: Directory + location: keep:9cb7b46a661eda4b9255714a31eaee1c+21037 + - class: Directory + location: keep:fa0402547d3a2ef224c8cec0a730ce32+21037 + - class: Directory + location: keep:2e21c7cfd65354721f78606c987e449c+21037 + - class: Directory + location: keep:a5296a9562fba84c7fefc57892addb2f+20978 + - class: Directory + location: keep:1a17c7abc302e83a6bdda1dfc0f11f61+21037 + - class: Directory + location: keep:64f07308131a03f5b93430a18a248dba+21528 + - class: Directory + location: keep:7be8cdbe73c0717629a2c58abfb1a0a2+21037 + - class: Directory + location: keep:9daea4925475221439048da5c97b80f9+21037 + - class: Directory + location: keep:8a107d64077d26e39d3cd0900ddebb4e+21444 + - class: Directory + location: keep:cee972e2fb687639320459497292eccf+20756 + - class: Directory + location: keep:722874b6ddd947bbf34d11abdb4271bf+21037 + - class: Directory + location: keep:d1b77ec53bdc9d9ac0ec0decd897e0fc+21037 + - class: Directory + location: keep:f99b35068e9972e7bf893f01585c3231+21037 + - class: Directory + location: keep:596a0a3a2b58ad354b9facd67c4f93c8+21037 + - class: Directory + location: keep:56085a1717dbaa43feff2f1ed2d6ae7d+21436 + - class: Directory + location: keep:0983df98a1d602a4434cbe327921ee42+21037 + - class: Directory + location: keep:26b43654a8d419d40b197bd06ab438d9+21037 + - class: Directory + location: keep:605b33acec8750852b8201e0ae98cf94+20923 + - class: Directory + location: keep:8ee6e30a40156e4692328a56dcc06327+21521 + - class: Directory + location: keep:a3a7bb8b4c99611fc152dbf321ee0a5b+21037 + - class: Directory + location: keep:21ef9096b769c5ae945a24b7eaab7c52+5418 + - class: Directory + location: keep:bb9f328605f0f597cecd4992c4cc91ea+21037 + - class: Directory + location: keep:1fbd348efbfdcff58423cc808520490a+20787 + - class: Directory + location: keep:ad6b5b7935f86ce26995b6b8b127e04b+21037 + - class: Directory + location: keep:41f10a76531d35d29bdf45c561d19678+21037 + - class: Directory + location: keep:80961e5505ecb0079c4477ed0001b21f+21037 + - class: Directory + location: keep:1aaf3b528c72d94732c1cf18e047bca9+20922 + - class: Directory + location: keep:7e338ffddacf4e247e03a4836856e585+21519 + - class: Directory + location: keep:5860af9f92b6759227c4ab8a0bf8c463+21037 + - class: Directory + location: keep:f87e293e0771107c2ed0231cef1dc331+21037 + - class: Directory + location: keep:f8b88a16a2521531b8bb2a5de8043f68+21533 + - class: Directory + location: keep:07d825d8cc3ee5e60aa365360e1052e5+21037 + - class: Directory + location: keep:ac2cf9dbf715d2f3d7c56bf558c94214+21037 + - class: Directory + location: keep:7709b885a502ce29a28384550a7ab40a+21037 + - class: Directory + location: keep:39203af39d34284eee108dc966ee46f4+20978 + - class: Directory + location: keep:db75f38419f25ad2ecb46c86e3f17d72+21037 + - class: Directory + location: keep:3f75ed86079ce6f9c502609896adb7c1+21037 + - class: Directory + location: keep:7288a896d67f4e8151b2ce66743da086+21523 + - class: Directory + location: keep:10b80de1f92992b056e7068d81321b3b+21037 + - class: Directory + location: keep:465f9957778cea2b0173667ad6c2cb68+21037 + - class: Directory + location: keep:8bad3d6a943df3c7e9d37422eabf78d0+20979 + - class: Directory + location: keep:7013b729faee9bcc20ef702568869fd9+21037 + - class: Directory + location: keep:6581830f52d747d04fa6bba5a13fbf0e+21519 + - class: Directory + location: keep:b7b0ad7d8c9f9953183e151928a7c9c0+21467 + - class: Directory + location: keep:af2a0fbfc4c3176fc1f7f5cf218a90de+21521 + - class: Directory + location: keep:209a1c955c53c10d53384f75c6d252e8+21444 + - class: Directory + location: keep:5311e8a7770129c08dd16a41ed94c5eb+21441 + - class: Directory + location: keep:aa08b00b2270276ab21a7923e7585050+21449 + - class: Directory + location: keep:e0ac7e2ab8c6d35e3d8a2603c9aa37a5+21452 diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-test.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-test.yml new file mode 100644 index 0000000000..2d6e0aa4fd --- /dev/null +++ b/cwl/lightning/yml/fasta2numpy-multi-wf-test.yml @@ -0,0 +1,41 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +refdirs: + - class: Directory + location: keep:3cf4b6ed2bf8cd3abc27cb5a79641a86+755 + - class: Directory + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761 +batchsize: 3 +fastadirs: + - class: Directory + location: keep:2xpu4-4zz18-6lg2a1uoanyiyzv + - class: Directory + location: keep:2xpu4-4zz18-bwlg17dkckptebn + - class: Directory + location: keep:2xpu4-4zz18-31tn6gfh4rn6rqb + - class: Directory + location: keep:2xpu4-4zz18-i4gi8cidwnaemt3 + - class: Directory + location: keep:2xpu4-4zz18-1vwxurck9m902n0 + - class: Directory + location: keep:2xpu4-4zz18-yge3zutgmgecgtl +matchgenome_array: ["ACT", "ACT|ADC", ""] +regions_nestedarray: + - - class: File + location: keep:bc2a62baf6698c2d58e6224f851884b8+224/gencode.v37lift37.annotation.gff3.gz + - class: File + location: keep:0b74ce016766e93f7b7292fe28ea8e25+70/PMC3896259.gff3.gz + - null + - - class: File + location: keep:bc2a62baf6698c2d58e6224f851884b8+224/gencode.v37.annotation.gff3.gz + - class: File + location: keep:dd5ab297413d698a39541aeadfa0f26f+70/PMC3896259.gff3.gz + - null +threads_array: [80, 10, 10] +mergeoutput_array: ["true", "true", "false"] +expandregions_array: [1000000, 1000000, 0] diff --git a/cwl/lightning/yml/fasta2numpy-multi-wf-testonehot.yml b/cwl/lightning/yml/fasta2numpy-multi-wf-testonehot.yml new file mode 100644 index 0000000000..ce6b406745 --- /dev/null +++ b/cwl/lightning/yml/fasta2numpy-multi-wf-testonehot.yml @@ -0,0 +1,68 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +refdirs: + - class: Directory + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761 +batchsize: 3 +fastadirs: + - class: Directory + location: keep:2xpu4-4zz18-6lg2a1uoanyiyzv + - class: Directory + location: keep:2xpu4-4zz18-bwlg17dkckptebn + - class: Directory + location: keep:2xpu4-4zz18-31tn6gfh4rn6rqb + - class: Directory + location: keep:2xpu4-4zz18-i4gi8cidwnaemt3 + - class: Directory + location: keep:2xpu4-4zz18-1vwxurck9m902n0 + - class: Directory + location: keep:2xpu4-4zz18-yge3zutgmgecgtl +matchgenome_array: [""] +regions_nestedarray: + - - null +threads_array: [10] +mergeoutput_array: ["false"] +expandregions_array: [0] +phenotypesdir: + class: Directory + location: keep:2xpu4-4zz18-yq0njnojx7kaj34 +chrs: + - "chr1" + - "chr2" + - "chr3" + - "chr4" + - "chr5" + - "chr6" + - "chr7" + - "chr8" + - "chr9" + - "chr10" + - "chr11" + - "chr12" + - "chr13" + - "chr14" + - "chr15" + - "chr16" + - "chr17" + - "chr18" + - "chr19" + - "chr20" + - "chr21" + - "chr22" + - "chrX" + - "chrY" +snpeffdatadir: + class: Directory + location: keep:66c966928931de252274772c76f73025+52054 +genomeversion_array: ["GRCh38.86"] +dbsnp: + class: File + location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf +gnomaddir: + class: Directory + location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135 diff --git a/cwl/lightning/yml/fasta2numpy-wf-0831_0315.yml b/cwl/lightning/yml/fasta2numpy-wf-0831_0315.yml new file mode 100644 index 0000000000..a71027af9c --- /dev/null +++ b/cwl/lightning/yml/fasta2numpy-wf-0831_0315.yml @@ -0,0 +1,357 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +refdir: + class: Directory + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761 +batchsize: 12 +matchgenome: "" +threads: 10 +mergeoutput: "false" +expandregions: 0 +phenotypesnofamilydir: + class: Directory + location: keep:9b76b33c948bba77f2bf58f915cff9f4+120 +phenotypesdir: + class: Directory + location: keep:8508667def6057f0bbf0ab4f751d8b05+205 +trainingsetsize: 0.8 +randomseed: 0 +pcacomponents: 10 +readmeinfo: ["2022-08-23", "called using VCPA1.1 pipeline", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"] +chrs: + - "chr1" + - "chr2" + - "chr3" + - "chr4" + - "chr5" + - "chr6" + - "chr7" + - "chr8" + - "chr9" + - "chr10" + - "chr11" + - "chr12" + - "chr13" + - "chr14" + - "chr15" + - "chr16" + - "chr17" + - "chr18" + - "chr19" + - "chr20" + - "chr21" + - "chr22" + - "chrX" + - "chrY" +snpeffdatadir: + class: Directory + location: keep:66c966928931de252274772c76f73025+52054 +genomeversion: "GRCh38.86" +dbsnp: + class: File + location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf +gnomaddir: + class: Directory + location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135 +fastadirs: + - class: Directory + location: keep:7f3ba4ea8d7504ce040da01507c71f6f+138479 + - class: Directory + location: keep:5f4e90fbbdbc9496bc80d68a0ed3c082+138479 + - class: Directory + location: keep:6a9d10762c98f71467957995f43a3227+138493 + - class: Directory + location: keep:8803e18d1c5d3655e4c65a67fcde261f+138469 + - class: Directory + location: keep:e7a60ec6ce207229bb5dd628882b7145+138497 + - class: Directory + location: keep:01d219030897f7e1550c5aa5d4df7a35+137556 + - class: Directory + location: keep:afbfe6c1d7d751607c47de7dcc3b839e+137959 + - class: Directory + location: keep:8eff0f12f4b3c1fdc4332326c2ea6674+137107 + - class: Directory + location: keep:64ca59668a2dcf61f351afcf350d42e7+138471 + - class: Directory + location: keep:62d89d039fad7221f16c34b722e097af+138465 + - class: Directory + location: keep:5a71084d1947ce8f9454b4b2e0796b41+138503 + - class: Directory + location: keep:69c4e14981608d6be9aeaf6da94aebfb+138459 + - class: Directory + location: keep:0eaa03f63440c825bc722022eac85448+138463 + - class: Directory + location: keep:e167599ed16c7b1f7bff217fc5d80a4b+138457 + - class: Directory + location: keep:cbf21c73503b4cb792b5231c46358ba3+138523 + - class: Directory + location: keep:11683cd5f820ff9d42d3888bbb00221f+138205 + - class: Directory + location: keep:8b09dae12292f375aa98de6f36be53c2+138491 + - class: Directory + location: keep:8d02abcdef3e1b3ab910303df4331399+137649 + - class: Directory + location: keep:c3884ba793bf5771ba7267246f747893+138479 + - class: Directory + location: keep:e96460313d37f2e7d0aa647d3b88f6f8+138479 + - class: Directory + location: keep:fa72acb0219d6833f90beb1e3874e485+138481 + - class: Directory + location: keep:01e5edc00295926a91351e8069cb144f+138477 + - class: Directory + location: keep:bf076ef6270cf0f2cf3c6aaa79893cc8+138048 + - class: Directory + location: keep:66d688c08060cd9750fb6e46cce3fa39+138479 + - class: Directory + location: keep:9c78e31f255406bee37d8960ce79c185+138471 + - class: Directory + location: keep:100591a332d9f256842f086fa92177dc+138735 + - class: Directory + location: keep:ef10e9c7591c5466d5791666f830bf06+138725 + - class: Directory + location: keep:8b42e879ae88a25fb4ca5888abea9c38+138687 + - class: Directory + location: keep:ffa0d2888d14b2940562801bc163d4b2+138667 + - class: Directory + location: keep:98854a1e4db58922e5e712507217e2b5+138483 + - class: Directory + location: keep:f72b41e5d5cb7dbb4171eac06f86f6a1+138479 + - class: Directory + location: keep:381b930fc4059b819bda2c414927deaf+138477 + - class: Directory + location: keep:558b093245095d1029f4cbe1f2726e99+138056 + - class: Directory + location: keep:0f937cc7f4e6f8b47d80e28f0a99bc8a+138479 + - class: Directory + location: keep:95aba9cd1cce2de6fbc6c5851c81387d+138479 + - class: Directory + location: keep:94328ab7b661267810f9f0197eae70da+138479 + - class: Directory + location: keep:666124ae8a9a6734bba5de48490748c0+138479 + - class: Directory + location: keep:98359aa9baa931eea204298f7b26563c+138479 + - class: Directory + location: keep:c061711a7b7e7d8acddfde36e785cc77+138479 + - class: Directory + location: keep:ac21ed8a0e6af91debc39ddee1197787+138503 + - class: Directory + location: keep:41ab49b22d2f4ed90fa4f446ef7fbb8d+138357 + - class: Directory + location: keep:0e6d1ff3738abd240efaca9079f62f46+138851 + - class: Directory + location: keep:ed82b93d0ca76ffd666457399c2462cd+138101 + - class: Directory + location: keep:7efb683d9556c87c69fb1cbbaa290820+137674 + - class: Directory + location: keep:377b8665f04091581e560614ba20ca46+138515 + - class: Directory + location: keep:da7207cc977b12f60372b742785eec62+138491 + - class: Directory + location: keep:54403688627695480b373eb354dc1c7f+138479 + - class: Directory + location: keep:dff90a9f9feaa5ed1e7c86f1e79926cc+138489 + - class: Directory + location: keep:f82c35228c761a1e0f3ec9f5687efddd+137093 + - class: Directory + location: keep:661cd8f54deabacb49ec78db93ed4578+138479 + - class: Directory + location: keep:b326e6b7773354041b03dfe4564ffd0b+137095 + - class: Directory + location: keep:853632bf7879231f9ac24096e52f95e1+138479 + - class: Directory + location: keep:f190f2167bcd4fe819878efcde43e6f0+138059 + - class: Directory + location: keep:51f70f5861f61aca2e441e2c5d282150+138479 + - class: Directory + location: keep:6b4ecc7f949c824f6e2c99ccd8450238+138479 + - class: Directory + location: keep:11ca0c4ee2a4409c6f43470515c897fd+138479 + - class: Directory + location: keep:9f89f0c88e2b709af225e8ba2f8bff8d+138477 + - class: Directory + location: keep:e8f37c72e41dcc35f02d4fbe5da08a8b+138485 + - class: Directory + location: keep:1bbc537e0b7ac50e1e9ec5fe12c5bb2e+138580 + - class: Directory + location: keep:f0322067b85babcae2ecdd19d34f7597+138478 + - class: Directory + location: keep:47aa083e8d0a6c4c297d036165902282+138478 + - class: Directory + location: keep:9f9d01fd058a06f30955f6b9709194fb+138531 + - class: Directory + location: keep:f382e81786847d7395684e858786fc62+138479 + - class: Directory + location: keep:42b9f228c66a1bb26f37f6751f4f125f+138479 + - class: Directory + location: keep:abfec28b755a4826f3e58703dbbd1ac9+138059 + - class: Directory + location: keep:6346d82f0746faee45f5ba194dd25d0b+138479 + - class: Directory + location: keep:046da89ef991cc019f5092c326e2b8f8+138479 + - class: Directory + location: keep:52bdef8f26c69eb338d5d94b08cce125+138479 + - class: Directory + location: keep:4e0dbaba2b52fd93cb2c279415186a7b+138599 + - class: Directory + location: keep:fa21ef0cd50701250a927b4993df53e7+138297 + - class: Directory + location: keep:a333d969b2b84dfe7a2abb5889763c26+138675 + - class: Directory + location: keep:5fcb253d3584035246f5b4bcd60fa348+137291 + - class: Directory + location: keep:8a73aff41e5b603adddcf1eefe2e15e9+137209 + - class: Directory + location: keep:e77aefd6c8f51abcf404e482a9b06b0d+134402 + - class: Directory + location: keep:6ae72d55cf26d4afa12dab3970e0bf59+136783 + - class: Directory + location: keep:b029a9d42e085a45a2ce1840d0961eb7+137879 + - class: Directory + location: keep:41cd1f1511d617ec6e4bfb42db64be03+138187 + - class: Directory + location: keep:55ad863fef7d8dccf4db32f282f752c4+135119 + - class: Directory + location: keep:30e016d511a2443985d2b051638a419b+138047 + - class: Directory + location: keep:2711053e417e15034cae0c8aec568a45+138352 + - class: Directory + location: keep:020cddfa8082c5476c011aaa189cf518+138479 + - class: Directory + location: keep:5bb2df76b0869bea8833784a934f7021+138479 + - class: Directory + location: keep:112983eafc025aaeb54d81cf01ce3a4e+138479 + - class: Directory + location: keep:6e1bf98f088ced1ef8b60f562374325b+138479 + - class: Directory + location: keep:153d189544f36dd39610087a782589ca+138479 + - class: Directory + location: keep:ff71aa46f8f4134f1556d4b19d0b9307+138058 + - class: Directory + location: keep:e996083281665242208b0773b0d6287c+138058 + - class: Directory + location: keep:3f2d1ab2f1fa98341ddab5b3cc53ec2e+138479 + - class: Directory + location: keep:abbcfbc8fc06988a2b0d94b0c443e459+138479 + - class: Directory + location: keep:7e6f8a9c9244ea7c6e3341d1b5aa6b32+138479 + - class: Directory + location: keep:e08ebbc428bc0604e07f72b21410ce58+138479 + - class: Directory + location: keep:e2367de25b3dffcb1ac1aa7373b7a283+138479 + - class: Directory + location: keep:f4318c61400198c3d44895e184e79b4c+138479 + - class: Directory + location: keep:5a0d263b7469871268e31919899e6103+138445 + - class: Directory + location: keep:0c5a32dc706c195619b3f84bac5d3978+136353 + - class: Directory + location: keep:3843ae092703f6696cb5f42f464f4583+134452 + - class: Directory + location: keep:745d1dec2cfeb0f28f3eaf9dedd6a962+136783 + - class: Directory + location: keep:570f09f532cbd74146bd8ba3cc363263+137749 + - class: Directory + location: keep:ec659571f3a78e63e457797fc58f1828+137677 + - class: Directory + location: keep:60da5cf26a882d1b4e024326cd7d893b+130004 + - class: Directory + location: keep:13f6862e9770f63bd44b6ef539541dc3+138079 + - class: Directory + location: keep:83fb84b000e58c16fe17adef5de277b1+138079 + - class: Directory + location: keep:2865b8e5e49decb8b65b42ae060bcc96+138079 + - class: Directory + location: keep:fc49628c14a44abf3e54c4a956aa5888+136845 + - class: Directory + location: keep:6c52a3c5f4bc152851ad9cdcacaabf23+138089 + - class: Directory + location: keep:c08e22c6705b07111205c0edc2c3872d+136586 + - class: Directory + location: keep:c577425ff9f3a0d886f3426065a72ca4+137691 + - class: Directory + location: keep:ddf235932a0e2176593aac5a4ac42861+136337 + - class: Directory + location: keep:aa28a7e5d5657abbd0134e66cf6a89a5+133731 + - class: Directory + location: keep:49b91c25d606fd2fa451d2b7c92193a8+137879 + - class: Directory + location: keep:838fd46abf759e68bdc17f29d1680752+138063 + - class: Directory + location: keep:a1391969d1b8e92d8c98ed4ad8fad8ed+136665 + - class: Directory + location: keep:3dfbeda6c564f9f275f74c694022b52d+135167 + - class: Directory + location: keep:94de36359231b01ef2e277083bfe5287+137677 + - class: Directory + location: keep:2370e1db54c8e0e4b2d314c4a7984304+137845 + - class: Directory + location: keep:aa76e46ce3409c0cccb96fe9f7ad960a+138170 + - class: Directory + location: keep:5bb1f2fcb9961e0c0aa7159cabdd2fd3+137678 + - class: Directory + location: keep:807a04e5c2d91d5f454323b6fc9e035d+137679 + - class: Directory + location: keep:368d3f71ad4e0822a3bac8abe67ed536+133665 + - class: Directory + location: keep:0d5b719030ae9bffbfbcd2c0855c66c7+128362 + - class: Directory + location: keep:2c08aca1b2c0537e0d153a21dc0561f8+136859 + - class: Directory + location: keep:9acd969c8b08cedce1fd8d769d214b75+137559 + - class: Directory + location: keep:e27d19c6a505cd01eea3f0a13ba46a83+137977 + - class: Directory + location: keep:127cc72a3bd0953bb19e05b83f5190d0+26161 + - class: Directory + location: keep:fd615aa5b1fcf7f2695c4ec8f5b9351a+55569 + - class: Directory + location: keep:dfed457efee3414f1c1f7536fe83eed3+109552 + - class: Directory + location: keep:14388f62c868254149d5972c7d80d28d+97366 + - class: Directory + location: keep:5e99d1ef6b90001e01450ffe093e9493+135278 + - class: Directory + location: keep:153533e237092985535d7f9b0a4a354e+135278 + - class: Directory + location: keep:1a0d1ede9fcabaabce592c86bec88db0+135278 + - class: Directory + location: keep:281ec2620a88b7274b44ece0ca96b543+135278 + - class: Directory + location: keep:9d2dfc3510b82e5249045e71862a44d9+135278 + - class: Directory + location: keep:a3191cb63a48307e9cd97cc6de42d83c+135194 + - class: Directory + location: keep:d80d9356d64396465ca61585b4c3031b+135419 + - class: Directory + location: keep:881e67efac709867dafc480116c2edf1+138479 + - class: Directory + location: keep:5ffe1b2bcdb75190b228d8e7117baaa0+138479 + - class: Directory + location: keep:6a671736ca4bf130e3ab507b702024bc+136672 + - class: Directory + location: keep:504c77a1298ff2649af35527dec8b467+138058 + - class: Directory + location: keep:e19f2c238e685f62f52c58eb4c44d703+134867 + - class: Directory + location: keep:afad6e6fd6933d8919a4a39712e80afe+67509 + - class: Directory + location: keep:09771439ce1431d168b15bb36cc8cacb+136998 + - class: Directory + location: keep:cb3b59ec7468d27ef643380568f945e4+138679 + - class: Directory + location: keep:b3ec1453c963d0b5c36134bd3e7c97dc+138679 + - class: Directory + location: keep:52d413d47ebc5abaeda098254ce6d517+138629 + - class: Directory + location: keep:43e6437a5dc4d833ed2e167b42b9430a+138059 + - class: Directory + location: keep:d081f62909038dd25c499972547ced53+138479 + - class: Directory + location: keep:bcd3a181bcb601c1e57f7cb7eeb4d270+137975 + - class: Directory + location: keep:9882f01b1cc6172959a3efab49d89397+78041 diff --git a/cwl/lightning/yml/fasta2numpy-wf-100test.yml b/cwl/lightning/yml/fasta2numpy-wf-100test.yml new file mode 100644 index 0000000000..0981448a40 --- /dev/null +++ b/cwl/lightning/yml/fasta2numpy-wf-100test.yml @@ -0,0 +1,63 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +refdir: + class: Directory + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761 +batchsize: 1 +matchgenome: "" +threads: 10 +mergeoutput: "false" +expandregions: 0 +phenotypesnofamilydir: + class: Directory + location: keep:9b76b33c948bba77f2bf58f915cff9f4+120 +phenotypesdir: + class: Directory + location: keep:8508667def6057f0bbf0ab4f751d8b05+205 +trainingsetsize: 0.8 +randomseed: 0 +pcacomponents: 10 +readmeinfo: ["2022-08-23", "called using VCPA1.1 pipeline", "https://workbench.2xpu4.arvadosapi.com/projects/2xpu4-j7d0g-50i3pwf3a16ubbf"] +chrs: + - "chr1" + - "chr2" + - "chr3" + - "chr4" + - "chr5" + - "chr6" + - "chr7" + - "chr8" + - "chr9" + - "chr10" + - "chr11" + - "chr12" + - "chr13" + - "chr14" + - "chr15" + - "chr16" + - "chr17" + - "chr18" + - "chr19" + - "chr20" + - "chr21" + - "chr22" + - "chrX" + - "chrY" +snpeffdatadir: + class: Directory + location: keep:66c966928931de252274772c76f73025+52054 +genomeversion: "GRCh38.86" +dbsnp: + class: File + location: keep:a088b297d614e4c63cbb23f8ad404438+12313/00-All.vcf.gz_renamed.bcf +gnomaddir: + class: Directory + location: keep:c6a8fc877e85d73ac5b165e2d7367e26+675135 +fastadirs: + - class: Directory + location: keep:7f3ba4ea8d7504ce040da01507c71f6f+138479 diff --git a/cwl/lightning/yml/imputation-gvcf2fasta-fasta2numpy-wf-100test.yml b/cwl/lightning/yml/imputation-gvcf2fasta-fasta2numpy-wf-100test.yml new file mode 100644 index 0000000000..a68bb4f1c6 --- /dev/null +++ b/cwl/lightning/yml/imputation-gvcf2fasta-fasta2numpy-wf-100test.yml @@ -0,0 +1,337 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +refdir: + class: Directory + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761 +batchsize: 1 +matchgenome: "" +threads: 10 +mergeoutput: "false" +expandregions: 0 +gqcutoff: 20 +genomebed: + class: File + location: keep:67b4d7e55e61cef7343f3928af97e197+54/hg38.bed +ref: + class: File + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761/hg38.fa.gz +chrs: ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22", "chrX"] +refsdir: + class: Directory + location: keep:3a1673a2230877bfaf92b50f7376529c+10590 +mapsdir: + class: Directory + location: keep:10b1baade985c576a97dfc37d12b953b+1096 +panelnocallbed: + class: File + location: keep:f0b387cc686f227748e6d49033c3f2f0+101/HG001_GRCh38_1_22_v4.2.1_benchmark_nocall.bed +panelcallbed: + class: File + location: keep:ea4364c48def931b8cdaf07913a3657b+94/HG001_GRCh38_1_22_v4.2.1_benchmark.bed +sampleids: + - "A-IIAA-IA000604-BL-NCR-14AD67907" + - "A-IIAA-IA000369-BL-NCR-14AD67573" + - "A-IIAA-IA000196-BL-NCR-14AD66938" + - "A-IIAA-IA000194-BL-NCR-14AD66933" + - "A-IIAA-IA000009-BL-NCR-14AD67102" + - "A-WCAP-WC001736-BL-COL-62260BL1" + - "A-WCAP-WC001725-BL-COL-62376BL1" + - "A-WCAP-WC001708-BL-COL-50951BL1" + - "A-WCAP-WC001695-BL-COL-46967BL1" + - "A-WCAP-WC001710-BL-COL-40530BL1" + - "A-WCAP-WC001720-BL-COL-62328BL1" + - "A-WCAP-WC001704-BL-COL-48409BL1" + - "A-WCAP-WC001702-BL-COL-45946BL1" + - "A-WCAP-WC001697-BL-COL-46951BL1" + - "A-WCAP-WC001699-BL-COL-55120BL1" + - "A-WCAP-WC001700-BL-COL-57684BL1" + - "A-WCAP-WC001696-BL-COL-50506BL1" + - "A-WCAP-WC001693-BL-COL-48336BL1" + - "A-WCAP-WC001683-BL-COL-49188BL1" + - "A-WCAP-WC001687-BL-COL-48252BL1" + - "A-WCAP-WC001694-BL-COL-45207BL1" + - "A-WCAP-WC001686-BL-COL-50549BL1" + - "A-WCAP-WC001691-BL-COL-46031BL1" + - "A-WCAP-WC001688-BL-COL-50977BL1" + - "A-WCAP-WC001684-BL-COL-47006BL1" + - "A-WCAP-WC001681-BL-COL-57657BL1" + - "A-WCAP-WC001682-BL-COL-47560BL1" + - "A-WCAP-WC001679-BL-COL-49250BL1" + - "A-WCAP-WC001672-BL-COL-45187BL1" + - "A-WCAP-WC001673-BL-COL-48284BL1" + - "A-WCAP-WC001675-BL-COL-48314BL1" + - "A-WCAP-WC001677-BL-COL-46990BL1" + - "A-WCAP-WC001671-BL-COL-50527BL1" + - "A-WCAP-WC001669-BL-COL-57703BL1" + - "A-WCAP-WC001667-BL-COL-46970BL1" + - "A-WCAP-WC001670-BL-COL-50445BL1" + - "A-WCAP-WC001654-BL-COL-46029BL1" + - "A-WCAP-WC001664-BL-COL-39293BL1" + - "A-WCAP-WC001665-BL-COL-47583BL1" + - "A-WCAP-WC001653-BL-COL-48362BL1" + - "A-WCAP-WC001657-BL-COL-65820BL1" + - "A-WCAP-WC001658-BL-COL-50384BL1" + - "A-WCAP-WC001666-BL-COL-48390BL1" + - "A-WCAP-WC001656-BL-COL-57746BL1" + - "A-WCAP-WC001638-BL-COL-47008BL1" + - "A-WCAP-WC001639-BL-COL-41818BL1" + - "A-WCAP-WC001646-BL-COL-45962BL1" + - "A-WCAP-WC001652-BL-COL-47594BL1" + - "A-WCAP-WC001640-BL-COL-47537BL1" + - "A-WCAP-WC001629-BL-COL-69602BL1" + - "A-WCAP-WC001641-BL-COL-46986BL1" + - "A-WCAP-WC001645-BL-COL-41786BL1" + - "A-WCAP-WC001636-BL-COL-47553BL1" + - "A-WCAP-WC001634-BL-COL-50462BL1" + - "A-WCAP-WC001623-BL-COL-56498BL1" + - "A-WCAP-WC001627-BL-COL-56607BL1" + - "A-WCAP-WC001626-BL-COL-48292BL1" + - "A-WCAP-WC001621-BL-COL-48345BL1" + - "A-WCAP-WC001618-BL-COL-50400BL1" + - "A-WCAP-WC001622-BL-COL-50921BL1" + - "A-WCAP-WC001616-BL-COL-56626BL1" + - "A-WCAP-WC001617-BL-COL-45961BL1" + - "A-WCAP-WC001612-BL-COL-49158BL1" + - "A-WCAP-WC001608-BL-COL-48342BL1" + - "A-WCAP-WC001611-BL-COL-48295BL1" + - "A-WCAP-WC001605-BL-COL-45954BL1" + - "A-WCAP-WC001594-BL-COL-40429BL1" + - "A-WCAP-WC001606-BL-COL-48422BL1" + - "A-WCAP-WC001595-BL-COL-45200BL1" + - "A-WCAP-WC001602-BL-COL-45226BL1" + - "A-WCAP-WC001604-BL-COL-56480BL1" + - "A-WCAP-WC001598-BL-COL-49123BL1" + - "A-WCAP-WC001603-BL-COL-56489BL1" + - "A-WCAP-WC001587-BL-COL-45975BL1" + - "A-WCAP-WC001593-BL-COL-45249BL1" + - "A-WCAP-WC001589-BL-COL-55018BL1" + - "A-WCAP-WC001577-BL-COL-48318BL1" + - "A-WCAP-WC001586-BL-COL-45943BL1" + - "A-WCAP-WC001585-BL-COL-45991BL1" + - "A-WCAP-WC001584-BL-COL-45976BL1" + - "A-WCAP-WC001581-BL-COL-56483BL1" + - "A-WCAP-WC001582-BL-COL-47610BL1" + - "A-WCAP-WC001572-BL-COL-45937BL1" + - "A-WCAP-WC001574-BL-COL-56642BL1" + - "A-WCAP-WC001567-BL-COL-45235BL1" + - "A-WCAP-WC001573-BL-COL-46034BL1" + - "A-WCAP-WC001568-BL-COL-50455BL1" + - "A-WCAP-WC001566-BL-COL-56566BL1" + - "A-WCAP-WC001559-BL-COL-49283BL1" + - "A-WCAP-WC001556-BL-COL-45259BL1" + - "A-WCAP-WC001564-BL-COL-39158BL1" + - "A-WCAP-WC001552-BL-COL-55020BL1" + - "A-WCAP-WC001563-BL-COL-41119BL1" + - "A-WCAP-WC001558-BL-COL-48277BL1" + - "A-WCAP-WC001545-BL-COL-45981BL1" + - "A-WCAP-WC001547-BL-COL-57785BL1" + - "A-WCAP-WC001543-BL-COL-45240BL1" + - "A-WCAP-WC001542-BL-COL-40920BL1" + - "A-WCAP-WC001541-BL-COL-41762BL1" + - "A-WCAP-WC001544-BL-COL-49161BL1" +splitvcfdirs: + - class: Directory + location: keep:47e5a217867e6a69efe10378541b38e7+7816 + - class: Directory + location: keep:00d1dac7ab3769e1b600129643b3f7bc+8361 + - class: Directory + location: keep:27e8ae061969d12e6137aa9b2e9b585c+6128 + - class: Directory + location: keep:24eb9ef3c45fae2569077d429f121bbd+8778 + - class: Directory + location: keep:00a065a8a5e71acfd083172de3a86d4b+6930 + - class: Directory + location: keep:c751a4f4778156180605ebc04bad6cc1+5894 + - class: Directory + location: keep:7f94ff84914a9d0d873f5313e7124c55+5642 + - class: Directory + location: keep:987b13f6ad4974b796cc2ec8f270d19d+6356 + - class: Directory + location: keep:2284173a8cbcf3e950a41d385069d327+7622 + - class: Directory + location: keep:db084c1516d23a4c6c746105d58a08f3+6356 + - class: Directory + location: keep:a823e23a5f0822a981fa720a2bcb2287+6062 + - class: Directory + location: keep:c7bd505077ecd399b59176f8d5bc34aa+5808 + - class: Directory + location: keep:a590e7f19b8f19f1babdd8b7795e1c20+5681 + - class: Directory + location: keep:1e1232a1a1df7b39605a4630464ef864+5517 + - class: Directory + location: keep:f5c57ac585ab060ca4f9af439acd3e78+5682 + - class: Directory + location: keep:714619e20a6cb2220fae47d5519d2b9f+5093 + - class: Directory + location: keep:d572b35e03d4a2545e4c917506738917+5383 + - class: Directory + location: keep:cca9ddebe18cdde474f9b9ceb33c0247+5768 + - class: Directory + location: keep:55a3599a3b6adac75c19772f2fd0e080+5683 + - class: Directory + location: keep:f4725fe4ae3032ff1f6852701aef182b+5176 + - class: Directory + location: keep:a046387c19b4b4e92d3728cfb5c2239d+5468 + - class: Directory + location: keep:ca6aabbfe01db391a27c755f413f7e24+5010 + - class: Directory + location: keep:f880cb3c4fb2dee626a7afdce73f6b35+5051 + - class: Directory + location: keep:213d15a3e1e2642b593449943d54f940+5851 + - class: Directory + location: keep:e5df0c86fe692bdc234acdea89c09735+5512 + - class: Directory + location: keep:3dcd72a55d24501eb4eeab04e735bc9e+6058 + - class: Directory + location: keep:a56ad75332c6e504237d20f17006b306+5558 + - class: Directory + location: keep:15fae2b96e4f4c28e9473755ade2beb0+5515 + - class: Directory + location: keep:7be46744f59209dd25710bfa8bfb59d0+6527 + - class: Directory + location: keep:bd0a1e4399598a231a8e78c475e94e22+5769 + - class: Directory + location: keep:209ab994cae2c50c0f0f409cecfe0af4+5343 + - class: Directory + location: keep:507185affe0707d6eb0269008717be79+4756 + - class: Directory + location: keep:57af40c026feaf3da5ab7e095caeeae1+5725 + - class: Directory + location: keep:3f515cba6b180bb7aa151ab05ce43270+5936 + - class: Directory + location: keep:15350af160d548a437e45a1ca0432363+7786 + - class: Directory + location: keep:dd76f2ee8fc0579b64e685d30e5d9922+5427 + - class: Directory + location: keep:fde51804f15e0fea5a6bb7be37e1d262+6734 + - class: Directory + location: keep:141fb2f192c4e1efdb6e373543022ab7+6568 + - class: Directory + location: keep:2a96bea877d4a9cf25753c1298f34e58+6612 + - class: Directory + location: keep:6edb216921b036a20cdd32583f2970b6+6022 + - class: Directory + location: keep:f26422b6b05b8bb1e9f486e5c09051b2+5640 + - class: Directory + location: keep:4ed2571eace3eb2963867ca835862646+6061 + - class: Directory + location: keep:a5bf8756702a8f79723d3b134a8c6cff+5725 + - class: Directory + location: keep:05fe61865950248bc6ed9f732426f42a+5385 + - class: Directory + location: keep:e48587348ce4b238ad6594f3a862fca7+7832 + - class: Directory + location: keep:802ba4f4f4a04e53f9e3120f5a871fd9+6902 + - class: Directory + location: keep:22b66f6b397d2e051740d0b3b896c13c+5892 + - class: Directory + location: keep:a6666076ab9bf6963e52d82206b17581+5429 + - class: Directory + location: keep:ed99ae4b5448d5e998444cdc2d288c4f+5978 + - class: Directory + location: keep:31e562eecd2259dc0404f83f138eb13a+6814 + - class: Directory + location: keep:4b247a882829c85824ca49309e51f8b3+5470 + - class: Directory + location: keep:4a3a45a029be557ceb627050b278404c+5097 + - class: Directory + location: keep:96576082494eacc33d34891643247e16+5639 + - class: Directory + location: keep:7849fd811c58ff9797956ca88885c072+5134 + - class: Directory + location: keep:8281291d46712c4dee2929be01a8459d+5935 + - class: Directory + location: keep:afdc5af01594e0e0372ab17287575db3+5427 + - class: Directory + location: keep:aa783333788f5dd554055074ed4cb5ab+5384 + - class: Directory + location: keep:21c8d76ef6ee9950cd2bb641b226a57b+5934 + - class: Directory + location: keep:57a4338099666f13ab7cd05bad7c67c8+5892 + - class: Directory + location: keep:04a1b83e91062b8c43eb3d470aaa6c64+5051 + - class: Directory + location: keep:ad31d97aa3d355a666fe07da625f3994+6482 + - class: Directory + location: keep:98acdba4fb52ac698eaad7449660227e+5517 + - class: Directory + location: keep:03b7cd1daf28b6dcc913a45342f37c96+6482 + - class: Directory + location: keep:60c42c1bad792d1d1ebc4c40420e8032+7030 + - class: Directory + location: keep:3e7ef8e480273a67e223db2842d38e43+5513 + - class: Directory + location: keep:b8ce59383ab761c76b35c91773409bf8+6692 + - class: Directory + location: keep:2c7a882d3f13a0299baf866dc83872d7+7029 + - class: Directory + location: keep:8740baf8f9730eff6d40a918a4c20f90+5384 + - class: Directory + location: keep:8e63209016939215a48def1b350dee0a+6650 + - class: Directory + location: keep:cee36503dcd257a70630396eab59e6c1+6481 + - class: Directory + location: keep:d17a17d9fb4d05cbcadde06b99fae806+5430 + - class: Directory + location: keep:c7f9b800e363290047d61904cc872c3e+5769 + - class: Directory + location: keep:2dee32c1ab8b1fcc264458ae2609a18c+4887 + - class: Directory + location: keep:9e18a67bc403b4d51ee4f556c597b689+5932 + - class: Directory + location: keep:7bf3c8ef6a8ed7b4563569e1e4b85154+5051 + - class: Directory + location: keep:cf3ca53fe3fe7955cd8993c9f2bdd24d+5682 + - class: Directory + location: keep:8716d3eff15d14b0a072e481698fc715+6485 + - class: Directory + location: keep:14b53d263217e13caf5755c66b8f9232+4884 + - class: Directory + location: keep:df79fb7025f8706f20ed678e1916fd15+5424 + - class: Directory + location: keep:e90c5745c169fc9f945463fcd065cdfd+4842 + - class: Directory + location: keep:2e65619e3e557ae435b8b24cad86ce0e+6440 + - class: Directory + location: keep:a67a94826b54eb78a4c3e582233579f8+6482 + - class: Directory + location: keep:c184cd5457f7026ba8112492e3741036+6397 + - class: Directory + location: keep:d5c5c607fd49300d94ecd7de39592eff+5177 + - class: Directory + location: keep:3c7bf8b576bce2567590f90362ef0edf+5009 + - class: Directory + location: keep:f83f53f0698ad52a1cb9b265a451eba8+5051 + - class: Directory + location: keep:7680642fc1c8741d6657ca8b30675661+5013 + - class: Directory + location: keep:55e6c7dc3edefb6625ee47ddfbe86f10+6648 + - class: Directory + location: keep:0abaab02ff171c7a3d283ec54c845498+5515 + - class: Directory + location: keep:dc949aef3a7959dc5259aa9e5caff0ac+6525 + - class: Directory + location: keep:52f5abd360f99207bc7266f8f3b4e2ba+5512 + - class: Directory + location: keep:3c0cb444429a6cf0be2ffc6e0359a345+6524 + - class: Directory + location: keep:c6d33856d6620ed3b7dfcfaa9e4fa987+5343 + - class: Directory + location: keep:84dc794f57a9fbf51f92a9add486702b+5341 + - class: Directory + location: keep:f1b7173ab79d41035044f8ffa7ea5595+7956 + - class: Directory + location: keep:7362951a09e4177e83af2ce779700ab8+6188 + - class: Directory + location: keep:5f45b74d16fc04376ff3a16d30518ebb+5935 + - class: Directory + location: keep:0a35ed7284d0851c7a2698026837c604+6900 + - class: Directory + location: keep:0f3da67b2ad0df2886e7fe1e1c1b5338+6777 + - class: Directory + location: keep:7ad01e84179c627d92b067ebe0c1e7bb+5893 diff --git a/cwl/lightning/yml/lightning-export-numpy-merged.yml b/cwl/lightning/yml/lightning-export-numpy-merged.yml new file mode 100644 index 0000000000..471a89fa4a --- /dev/null +++ b/cwl/lightning/yml/lightning-export-numpy-merged.yml @@ -0,0 +1,8 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +lib: + class: File + location: keep:921ce01c909acc0068b820ca48d3fcde+1542/library.gob.gz +chunks: 1 diff --git a/cwl/lightning/yml/lightning-import-ref37.yml b/cwl/lightning/yml/lightning-import-ref37.yml new file mode 100644 index 0000000000..b092cde955 --- /dev/null +++ b/cwl/lightning/yml/lightning-import-ref37.yml @@ -0,0 +1,11 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +saveincomplete: "true" +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +fastadirs: + - class: Directory + location: keep:3cf4b6ed2bf8cd3abc27cb5a79641a86+755 diff --git a/cwl/lightning/yml/lightning-import-ref38.yml b/cwl/lightning/yml/lightning-import-ref38.yml new file mode 100644 index 0000000000..fbb4922ea7 --- /dev/null +++ b/cwl/lightning/yml/lightning-import-ref38.yml @@ -0,0 +1,11 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +saveincomplete: "true" +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +fastadir: + class: Directory + location: keep:ee5b90cf2d5f3573e6d455ab56e15cdf+761 diff --git a/cwl/lightning/yml/lightning-import-testdata.yml b/cwl/lightning/yml/lightning-import-testdata.yml new file mode 100644 index 0000000000..3f04854054 --- /dev/null +++ b/cwl/lightning/yml/lightning-import-testdata.yml @@ -0,0 +1,15 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +saveincomplete: "false" +tagset: + class: File + location: keep:c37923fd267415556962d5c535e9b075+110/tagset.fa.gz +fastadirs: + - class: Directory + location: keep:2xpu4-4zz18-6lg2a1uoanyiyzv + - class: Directory + location: keep:2xpu4-4zz18-bwlg17dkckptebn + - class: Directory + location: keep:2xpu4-4zz18-31tn6gfh4rn6rqb diff --git a/cwl/lightning/yml/lightning-merge-testdata_ref38.yml b/cwl/lightning/yml/lightning-merge-testdata_ref38.yml new file mode 100644 index 0000000000..6d4e4169a1 --- /dev/null +++ b/cwl/lightning/yml/lightning-merge-testdata_ref38.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +lib1: + class: File + location: keep:9ecc22a5a7820128bb1fbf1b457848a9+1525/library.gob.gz +lib2: + class: File + location: keep:2a134f80896100f67fbb0a1d9dfb3ee8+1107/library.gob.gz diff --git a/cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-0831_0315.yml b/cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-0831_0315.yml new file mode 100644 index 0000000000..e7d9d35642 --- /dev/null +++ b/cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-0831_0315.yml @@ -0,0 +1,12 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +matchgenome: "" +libdir: + class: Directory + location: keep:37bfc41e8ce21b8c1719cf4790f454c6+656483 +regions: null +threads: 10 +mergeoutput: "false" +expandregions: 0 diff --git a/cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-test.yml b/cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-test.yml new file mode 100644 index 0000000000..e5affaa9f3 --- /dev/null +++ b/cwl/lightning/yml/lightning-slice-numpy-anno2vcf-wf-test.yml @@ -0,0 +1,12 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +matchgenome: "" +libdir: + class: Directory + location: keep:0eb748be8b4ce392eb405b71199a2ef1+54032 +regions: null +threads: 10 +mergeoutput: "false" +expandregions: 0 diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-A-ADC.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-A-ADC.yml new file mode 100644 index 0000000000..36ceedccc5 --- /dev/null +++ b/cwl/lightning/yml/lightning-slice-numpy-onehot-A-ADC.yml @@ -0,0 +1,14 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +matchgenome: "A-ADC" +libdir: + class: Directory + location: keep:5b615d7692137b25e0411801e7c37b0c+656435 +threads: 10 +mergeoutput: "false" +expandregions: 0 +phenotypesdir: + class: Directory + location: keep:9b76b33c948bba77f2bf58f915cff9f4+120 diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-A-CUHS.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-A-CUHS.yml new file mode 100644 index 0000000000..bb65d52fa9 --- /dev/null +++ b/cwl/lightning/yml/lightning-slice-numpy-onehot-A-CUHS.yml @@ -0,0 +1,14 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +matchgenome: "A-CUHS" +libdir: + class: Directory + location: keep:5b615d7692137b25e0411801e7c37b0c+656435 +threads: 10 +mergeoutput: "false" +expandregions: 0 +phenotypesdir: + class: Directory + location: keep:9b76b33c948bba77f2bf58f915cff9f4+120 diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-A-IIAA.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-A-IIAA.yml new file mode 100644 index 0000000000..1f95cbc27d --- /dev/null +++ b/cwl/lightning/yml/lightning-slice-numpy-onehot-A-IIAA.yml @@ -0,0 +1,14 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +matchgenome: "A-IIAA" +libdir: + class: Directory + location: keep:5b615d7692137b25e0411801e7c37b0c+656435 +threads: 10 +mergeoutput: "false" +expandregions: 0 +phenotypesdir: + class: Directory + location: keep:9b76b33c948bba77f2bf58f915cff9f4+120 diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-ADNI.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-ADNI.yml new file mode 100644 index 0000000000..d4f795d482 --- /dev/null +++ b/cwl/lightning/yml/lightning-slice-numpy-onehot-ADNI.yml @@ -0,0 +1,14 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +matchgenome: "ADNI" +libdir: + class: Directory + location: keep:5b615d7692137b25e0411801e7c37b0c+656435 +threads: 10 +mergeoutput: "false" +expandregions: 0 +phenotypesdir: + class: Directory + location: keep:9b76b33c948bba77f2bf58f915cff9f4+120 diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-full.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-full.yml new file mode 100644 index 0000000000..7b91088f7e --- /dev/null +++ b/cwl/lightning/yml/lightning-slice-numpy-onehot-full.yml @@ -0,0 +1,14 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +matchgenome: "" +libdir: + class: Directory + location: keep:5b615d7692137b25e0411801e7c37b0c+656435 +threads: 10 +mergeoutput: "false" +expandregions: 0 +phenotypesdir: + class: Directory + location: keep:9b76b33c948bba77f2bf58f915cff9f4+120 diff --git a/cwl/lightning/yml/lightning-slice-numpy-onehot-pvalue1e-8-full.yml b/cwl/lightning/yml/lightning-slice-numpy-onehot-pvalue1e-8-full.yml new file mode 100644 index 0000000000..d991c5b1a5 --- /dev/null +++ b/cwl/lightning/yml/lightning-slice-numpy-onehot-pvalue1e-8-full.yml @@ -0,0 +1,14 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +matchgenome: "" +libdir: + class: Directory + location: keep:25d3137ac3d35c5600a1b81ad8b64ec3+656017 +threads: 10 +mergeoutput: "false" +expandregions: 0 +phenotypesdir: + class: Directory + location: keep:9b76b33c948bba77f2bf58f915cff9f4+120 diff --git a/cwl/lightning/yml/lightning-tiling-stats-ref37.yml b/cwl/lightning/yml/lightning-tiling-stats-ref37.yml new file mode 100644 index 0000000000..71171d5880 --- /dev/null +++ b/cwl/lightning/yml/lightning-tiling-stats-ref37.yml @@ -0,0 +1,7 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +libdir: + class: Directory + location: keep:3ae12ac1758199f567ff4ce0e41d72cc+1107 diff --git a/cwl/preprocess/cgivar/bedtools-intersect.cwl b/cwl/preprocess/cgivar/bedtools-intersect.cwl new file mode 100644 index 0000000000..b3b0bbed1c --- /dev/null +++ b/cwl/preprocess/cgivar/bedtools-intersect.cwl @@ -0,0 +1,45 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: CommandLineTool +label: Intersect VCF with BED +requirements: + ShellCommandRequirement: {} +inputs: + vcf: + type: File + label: Input VCF + bed: + type: File + label: Input BED +outputs: + vcfgz: + type: File + label: Output VCF with records inside the BED region + outputBinding: + glob: "*.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [bedtools, intersect] +arguments: + - "-header" + - prefix: "-a" + valueFrom: $(inputs.vcf) + - prefix: "-b" + valueFrom: $(inputs.bed) + - prefix: "-f" + valueFrom: "1" + - shellQuote: false + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: false + valueFrom: ">" + - $(inputs.vcf.basename).gz + - shellQuote: false + valueFrom: "&&" + - "tabix" + - prefix: "-p" + valueFrom: "vcf" + - $(inputs.vcf.basename).gz diff --git a/cwl/preprocess/cgivar/cgatools-mkvcf.cwl b/cwl/preprocess/cgivar/cgatools-mkvcf.cwl new file mode 100644 index 0000000000..fc6bd02d7b --- /dev/null +++ b/cwl/preprocess/cgivar/cgatools-mkvcf.cwl @@ -0,0 +1,29 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: CommandLineTool +label: Convert CGIVAR to VCF +inputs: + cgascript: + type: File + label: Script invoking cgatools + reference: + type: File + label: CRR reference used for cgatools + cgivar: + type: File + label: Input CGIVAR + sample: + type: string + label: Sample name +outputs: + vcf: + type: stdout + label: Output VCF +arguments: + - $(inputs.cgascript) + - $(inputs.reference) + - $(inputs.cgivar) +stdout: $(inputs.sample).vcf diff --git a/cwl/preprocess/cgivar/cgatools-mkvcf.sh b/cwl/preprocess/cgivar/cgatools-mkvcf.sh new file mode 100755 index 0000000000..4ddf24cb8c --- /dev/null +++ b/cwl/preprocess/cgivar/cgatools-mkvcf.sh @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/bin/sh + +REFERENCE=$1 +CGIVAR=$2 + +cgatools mkvcf --beta --reference $REFERENCE --include-no-calls --field-names GT,GQ,DP,AD --source-names masterVar --master-var $CGIVAR || true diff --git a/cwl/preprocess/cgivar/cgivar2vcfbed-wf.cwl b/cwl/preprocess/cgivar/cgivar2vcfbed-wf.cwl new file mode 100644 index 0000000000..0628a08b5a --- /dev/null +++ b/cwl/preprocess/cgivar/cgivar2vcfbed-wf.cwl @@ -0,0 +1,69 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.0 +class: Workflow +label: Convert CGIVAR to VCF and BED region +requirements: + arv:RunInSingleContainer: {} +hints: + DockerRequirement: + dockerPull: cgivar2vcfbed + ResourceRequirement: + ramMin: 12000 +inputs: + cgivar: + type: File + label: Input CGIVAR + sample: + type: string + label: Sample name + reference: + type: File + label: CRR reference used for cgatools + cgascript: + type: File + label: Script invoking cgatools + fixscript: + type: File + label: Script to fix VCF + +outputs: + vcfgz: + type: File + label: Output VCF + outputSource: bedtools-intersect/vcfgz + bed: + type: File + label: BED region VCF + outputSource: gvcf_regions/bed + +steps: + cgatools-mkvcf: + run: cgatools-mkvcf.cwl + in: + cgascript: cgascript + reference: reference + cgivar: cgivar + sample: sample + out: [vcf] + fix_vcf: + run: fix_vcf.cwl + in: + fixscript: fixscript + vcf: cgatools-mkvcf/vcf + out: [fixedvcf] + gvcf_regions: + run: gvcf_regions.cwl + in: + vcf: fix_vcf/fixedvcf + out: [bed] + bedtools-intersect: + run: bedtools-intersect.cwl + in: + vcf: fix_vcf/fixedvcf + bed: gvcf_regions/bed + out: [vcfgz] diff --git a/cwl/preprocess/cgivar/fix_vcf.cwl b/cwl/preprocess/cgivar/fix_vcf.cwl new file mode 100644 index 0000000000..29e2a78b4a --- /dev/null +++ b/cwl/preprocess/cgivar/fix_vcf.cwl @@ -0,0 +1,24 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: CommandLineTool +label: Fix VCF with an extra period in the INFO field +requirements: + InlineJavascriptRequirement: {} +inputs: + fixscript: + type: File + label: Script to fix VCF + vcf: + type: File + label: Input VCF +outputs: + fixedvcf: + type: stdout + label: Fixed VCF +arguments: + - $(inputs.fixscript) + - $(inputs.vcf) +stdout: $(inputs.vcf.nameroot).vcf diff --git a/cwl/preprocess/cgivar/fix_vcf.py b/cwl/preprocess/cgivar/fix_vcf.py new file mode 100755 index 0000000000..dc21275a15 --- /dev/null +++ b/cwl/preprocess/cgivar/fix_vcf.py @@ -0,0 +1,45 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/usr/bin/env python + +import sys + +def is_header(line): + """Check if a line is header.""" + + return line.startswith('#') + +def has_END(line): + """Check if a line has the 'END=' tag.""" + + return 'END=' in line + +# FIELD index +# CHROM 0, POS 1, REF 3, QUAL 5, INFO 7, FORMAT 8, sample 9 + +def fix_END(line): + + all_fields = line.split('\t') + INFO = all_fields[7] + INFO_fields = INFO.split(';') + for i, INFO_field in enumerate(INFO_fields): + if INFO_field.split('=')[0] == 'END': + INFO_fields[i] = INFO_fields[i].replace('.', '') + + all_fields[7] = ';'.join(INFO_fields) + line = '\t'.join(all_fields) + + return line + +if __name__ == '__main__': + vcf = sys.argv[1] + with open(vcf) as g: + for line in g: + if is_header(line): + print line.strip() + elif has_END(line): + print fix_END(line).strip() + else: + print line.strip() diff --git a/cwl/preprocess/cgivar/getfiles.cwl b/cwl/preprocess/cgivar/getfiles.cwl new file mode 100644 index 0000000000..918a72f602 --- /dev/null +++ b/cwl/preprocess/cgivar/getfiles.cwl @@ -0,0 +1,34 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: ExpressionTool +label: Create list of CGIVARs to process +inputs: + dir: + type: Directory + label: Input directory of CGIVARs +outputs: + cgivars: + type: File[] + label: Output CGIVARs + samples: + type: string[] + label: Sample names of CGIVARs +requirements: + InlineJavascriptRequirement: {} +expression: | + ${ + var cgivars = []; + var samples = []; + for (var i = 0; i < inputs.dir.listing.length; i++) { + var file = inputs.dir.listing[i]; + if (file.nameext == ".bz2") { + cgivars.push(file); + var sample = file.basename.split(".")[0]; + samples.push(sample); + } + } + return {"cgivars": cgivars, "samples": samples}; + } diff --git a/cwl/preprocess/cgivar/gvcf_regions.cwl b/cwl/preprocess/cgivar/gvcf_regions.cwl new file mode 100644 index 0000000000..f866543d2e --- /dev/null +++ b/cwl/preprocess/cgivar/gvcf_regions.cwl @@ -0,0 +1,20 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: CommandLineTool +label: Extract called region from GVCF +inputs: + vcf: + type: File + label: Input GVCF +outputs: + bed: + type: stdout + label: BED region of GVCF +baseCommand: /gvcf_regions/gvcf_regions.py +arguments: + - $(inputs.vcf) + - "--unreported_is_called" +stdout: $(inputs.vcf.nameroot).bed diff --git a/cwl/preprocess/cgivar/scatter-cgivar2vcfbed-wf.cwl b/cwl/preprocess/cgivar/scatter-cgivar2vcfbed-wf.cwl new file mode 100644 index 0000000000..4984e0cb10 --- /dev/null +++ b/cwl/preprocess/cgivar/scatter-cgivar2vcfbed-wf.cwl @@ -0,0 +1,62 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.0 +class: Workflow +label: Scatter to convert CGIVARs to VCFs and BEDs +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: cgivar2vcfbed +inputs: + cgivarsdir: + type: Directory + label: Input directory of CGIVARs + reference: + type: File + label: CRR reference used for cgatools + cgascript: + type: File + label: Script invoking cgatools + default: + class: File + location: cgatools-mkvcf.sh + fixscript: + type: File + label: Script to fix VCF + default: + class: File + location: fix_vcf.py + +outputs: + vcfgzs: + type: File[] + label: Output VCFs + outputSource: cgivar2vcfbed-wf/vcfgz + beds: + type: File[] + label: Output BEDs + outputSource: cgivar2vcfbed-wf/bed + +steps: + getfiles: + run: getfiles.cwl + in: + dir: cgivarsdir + out: [cgivars, samples] + cgivar2vcfbed-wf: + run: cgivar2vcfbed-wf.cwl + scatter: [cgivar, sample] + scatterMethod: dotproduct + in: + cgivar: getfiles/cgivars + sample: getfiles/samples + reference: reference + cgascript: cgascript + fixscript: fixscript + out: [vcfgz, bed] diff --git a/cwl/preprocess/cgivar/yml/cgivar2vcfbed-wf-test.yml b/cwl/preprocess/cgivar/yml/cgivar2vcfbed-wf-test.yml new file mode 100644 index 0000000000..22cc7ddd4a --- /dev/null +++ b/cwl/preprocess/cgivar/yml/cgivar2vcfbed-wf-test.yml @@ -0,0 +1,17 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cgivar: + class: File + location: keep:c9262dd71f8421735b3dde4e1b5b342f+86993/hu011C57_var-GS000015172-ASM.tsv.bz2 +sample: "hu011C57_var-GS000015172-ASM" +reference: + class: File + location: keep:320d7a6717aa7b555a09e52976ba4a02+18534/build37.crr +cgascript: + class: File + location: ../cgatools-mkvcf.sh +fixscript: + class: File + location: ../fix_vcf.py diff --git a/cwl/preprocess/cgivar/yml/scatter-cgivar2vcfbed-wf-367set.yml b/cwl/preprocess/cgivar/yml/scatter-cgivar2vcfbed-wf-367set.yml new file mode 100644 index 0000000000..996a373824 --- /dev/null +++ b/cwl/preprocess/cgivar/yml/scatter-cgivar2vcfbed-wf-367set.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cgivarsdir: + class: Directory + location: keep:c9262dd71f8421735b3dde4e1b5b342f+86993 +reference: + class: File + location: keep:320d7a6717aa7b555a09e52976ba4a02+18534/build37.crr diff --git a/cwl/preprocess/chrmvcf/change_gt_M.js b/cwl/preprocess/chrmvcf/change_gt_M.js new file mode 100644 index 0000000000..5d8e7239e5 --- /dev/null +++ b/cwl/preprocess/chrmvcf/change_gt_M.js @@ -0,0 +1,16 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +function record() { + if (CHROM == 'M') { + var inputGT = SAMPLES[0].GT; + if (inputGT.indexOf('/') == -1 && inputGT.indexOf('|') == -1 ) { + SAMPLES[0].GT = inputGT + "/" + inputGT; + } else if (inputGT.indexOf('/') != -1 && inputGT.split('/')[0] != inputGT.split('/')[1]) { + return false; + } else if (inputGT.indexOf('|') != -1 && inputGT.split('|')[0] != inputGT.split('|')[1]) { + return false; + } + } +} diff --git a/cwl/preprocess/chrmvcf/change_gt_chrM.js b/cwl/preprocess/chrmvcf/change_gt_chrM.js new file mode 100644 index 0000000000..3b38c92c92 --- /dev/null +++ b/cwl/preprocess/chrmvcf/change_gt_chrM.js @@ -0,0 +1,16 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +function record() { + if (CHROM == 'chrM') { + var inputGT = SAMPLES[0].GT; + if (inputGT.indexOf('/') == -1 && inputGT.indexOf('|') == -1 ) { + SAMPLES[0].GT = inputGT + "/" + inputGT; + } else if (inputGT.indexOf('/') != -1 && inputGT.split('/')[0] != inputGT.split('/')[1]) { + return false; + } else if (inputGT.indexOf('|') != -1 && inputGT.split('|')[0] != inputGT.split('|')[1]) { + return false; + } + } +} diff --git a/cwl/preprocess/chrmvcf/fixchrm-wf.cwl b/cwl/preprocess/chrmvcf/fixchrm-wf.cwl new file mode 100644 index 0000000000..52878fd246 --- /dev/null +++ b/cwl/preprocess/chrmvcf/fixchrm-wf.cwl @@ -0,0 +1,36 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: Workflow +label: Scatter to fix VCF by processing chrM +requirements: + ScatterFeatureRequirement: {} +inputs: + vcfdir: + type: Directory + label: Input VCF directory + filterjs: + type: File + label: Javascript code for filtering + +outputs: + fixedvcfs: + type: File[] + label: Fixed VCFs + outputSource: fixchrm/fixedvcf + +steps: + getfiles: + run: getfiles.cwl + in: + dir: vcfdir + out: [vcfs] + fixchrm: + run: fixchrm.cwl + scatter: [vcf] + in: + vcf: getfiles/vcfs + filterjs: filterjs + out: [fixedvcf] diff --git a/cwl/preprocess/chrmvcf/fixchrm.cwl b/cwl/preprocess/chrmvcf/fixchrm.cwl new file mode 100644 index 0000000000..5c065851cb --- /dev/null +++ b/cwl/preprocess/chrmvcf/fixchrm.cwl @@ -0,0 +1,41 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: CommandLineTool +label: Fix VCF by processing chrM +requirements: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + coresMin: 2 + ramMin: 8000 +hints: + arv:RuntimeConstraints: + keep_cache: 4096 +inputs: + vcf: + type: File + label: Input VCF file + filterjs: + type: File + label: Javascript code for filtering +outputs: + fixedvcf: + type: File + label: Fixed VCF + outputBinding: + glob: "*vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [rtg, vcffilter] +arguments: + - prefix: "-i" + valueFrom: $(inputs.vcf) + - prefix: "-o" + valueFrom: $(inputs.vcf.basename) + - prefix: "--javascript" + valueFrom: $(inputs.filterjs) diff --git a/cwl/preprocess/chrmvcf/getfiles.cwl b/cwl/preprocess/chrmvcf/getfiles.cwl new file mode 100644 index 0000000000..60dd2136c7 --- /dev/null +++ b/cwl/preprocess/chrmvcf/getfiles.cwl @@ -0,0 +1,28 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: ExpressionTool +label: Create list of VCFs to process +requirements: + InlineJavascriptRequirement: {} +inputs: + dir: + type: Directory + label: Input directory of VCFs +outputs: + vcfs: + type: File[] + label: Output VCFs +expression: | + ${ + var vcfs = []; + for (var i = 0; i < inputs.dir.listing.length; i++) { + var file = inputs.dir.listing[i]; + if (file.nameext == ".gz") { + vcfs.push(file); + } + } + return {"vcfs": vcfs}; + } diff --git a/cwl/preprocess/chrmvcf/yml/fixchrm-test.yml b/cwl/preprocess/chrmvcf/yml/fixchrm-test.yml new file mode 100644 index 0000000000..e34ed8281c --- /dev/null +++ b/cwl/preprocess/chrmvcf/yml/fixchrm-test.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcf: + class: File + location: keep:61ee4a11243384b7f7dfc76291edcab8+85964/filtered_hu01F73B_var-GS000037833-ASM.vcf.gz +filterjs: + class: File + location: ../change_gt_M.js diff --git a/cwl/preprocess/chrmvcf/yml/fixchrm-wf-1kcgi.yml b/cwl/preprocess/chrmvcf/yml/fixchrm-wf-1kcgi.yml new file mode 100644 index 0000000000..922bdc1908 --- /dev/null +++ b/cwl/preprocess/chrmvcf/yml/fixchrm-wf-1kcgi.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfdir: + class: Directory + location: keep:ff0df425ebf968cf38ec0574940040e2+122387 +filterjs: + class: File + location: ../change_gt_M.js diff --git a/cwl/preprocess/chrmvcf/yml/fixchrm-wf-harvardpgpcgi.yml b/cwl/preprocess/chrmvcf/yml/fixchrm-wf-harvardpgpcgi.yml new file mode 100644 index 0000000000..52539596b0 --- /dev/null +++ b/cwl/preprocess/chrmvcf/yml/fixchrm-wf-harvardpgpcgi.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfdir: + class: Directory + location: keep:61ee4a11243384b7f7dfc76291edcab8+85964 +filterjs: + class: File + location: ../change_gt_M.js diff --git a/cwl/preprocess/chrmvcf/yml/fixchrm-wf-simons.yml b/cwl/preprocess/chrmvcf/yml/fixchrm-wf-simons.yml new file mode 100644 index 0000000000..656ef63349 --- /dev/null +++ b/cwl/preprocess/chrmvcf/yml/fixchrm-wf-simons.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfdir: + class: Directory + location: keep:73a438da5ff6bb4a3956ae1a05e574a0+84041 +filterjs: + class: File + location: ../change_gt_M.js diff --git a/cwl/preprocess/gvcf/filtercleangvcf-wf.cwl b/cwl/preprocess/gvcf/filtercleangvcf-wf.cwl new file mode 100644 index 0000000000..50f3016e1d --- /dev/null +++ b/cwl/preprocess/gvcf/filtercleangvcf-wf.cwl @@ -0,0 +1,48 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: Workflow +label: Filters gVCFs by a specified quality cutoff and cleans +requirements: + ScatterFeatureRequirement: {} +hints: + arv:RuntimeConstraints: + keep_cache: 4096 + +inputs: + gvcfdir: + type: Directory + label: Input gVCF directory + cutoff: + type: int + label: Filtering cutoff threshold + keepgqdot: + type: boolean? + label: Flag for keeping GQ represented by "." + +outputs: + filteredcleangvcfs: + type: File[] + label: Filtered clean gVCFs + outputSource: filtercleangvcf/filteredcleangvcf + +steps: + getfiles: + run: getfiles.cwl + in: + gvcfdir: gvcfdir + out: [gvcfs] + + filtercleangvcf: + run: filtercleangvcf.cwl + scatter: gvcf + in: + gvcf: getfiles/gvcfs + keepgqdot: keepgqdot + cutoff: cutoff + out: [filteredcleangvcf] diff --git a/cwl/preprocess/gvcf/filtercleangvcf.cwl b/cwl/preprocess/gvcf/filtercleangvcf.cwl new file mode 100644 index 0000000000..a81351ddcd --- /dev/null +++ b/cwl/preprocess/gvcf/filtercleangvcf.cwl @@ -0,0 +1,75 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: CommandLineTool +label: Filters gVCFs by a specified quality cutoff and cleans +requirements: + DockerRequirement: + dockerPull: arvados/l7g + ResourceRequirement: + coresMin: 2 + ramMin: 8000 + ShellCommandRequirement: {} +hints: + arv:RuntimeConstraints: + keep_cache: 4096 +inputs: + gvcf: + type: File + label: Input gVCF file + secondaryFiles: [.tbi] + filtergvcf: + type: File + label: Code that filters gVCFs + default: + class: File + location: src/filter-gvcf + cutoff: + type: int + label: Filtering cutoff threshold + keepgqdot: + type: boolean? + label: Flag for keeping GQ represented by "." + cleanvcf: + type: File + label: Code that cleans gVCFs + default: + class: File + location: src/cleanvcf.py +outputs: + filteredcleangvcf: + type: File + label: Filtered and clean gVCF + outputBinding: + glob: "*vcf.gz" + secondaryFiles: [.tbi] +baseCommand: zcat +arguments: + - $(inputs.gvcf) + - shellQuote: false + valueFrom: "|" + - $(inputs.filtergvcf) + - prefix: "-k" + valueFrom: $(inputs.keepgqdot) + - $(inputs.cutoff) + - shellQuote: false + valueFrom: "|" + - $(inputs.cleanvcf) + - shellQuote: false + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: false + valueFrom: ">" + - $(inputs.gvcf.nameroot).gz + - shellQuote: false + valueFrom: "&&" + - "tabix" + - prefix: "-p" + valueFrom: "vcf" + - $(inputs.gvcf.nameroot).gz diff --git a/cwl/preprocess/gvcf/getfiles.cwl b/cwl/preprocess/gvcf/getfiles.cwl new file mode 100644 index 0000000000..808da7363c --- /dev/null +++ b/cwl/preprocess/gvcf/getfiles.cwl @@ -0,0 +1,41 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + cwltool: "http://commonwl.org/cwltool#" +class: ExpressionTool +label: Create list of gVCFs from directory +cwlVersion: v1.0 +requirements: + InlineJavascriptRequirement: {} +hints: + cwltool:LoadListingRequirement: + loadListing: shallow_listing +inputs: + gvcfdir: + type: Directory + label: Directory of input gVCFs +outputs: + gvcfs: + type: File[] + label: Array of gvcfs + secondaryFiles: [.tbi] +expression: | + ${ + var gvcfs = []; + for (var i = 0; i < inputs.gvcfdir.listing.length; i++) { + var file = inputs.gvcfdir.listing[i]; + if (file.nameext == '.gz') { + var main = file; + for (var j = 0; j < inputs.gvcfdir.listing.length; j++) { + var file = inputs.gvcfdir.listing[j]; + if (file.basename == main.basename+".tbi") { + main.secondaryFiles = [file]; + } + } + gvcfs.push(main); + } + } + return {"gvcfs": gvcfs}; + } diff --git a/cwl/preprocess/gvcf/src/cleanvcf.py b/cwl/preprocess/gvcf/src/cleanvcf.py new file mode 100755 index 0000000000..c6fb0c5fa4 --- /dev/null +++ b/cwl/preprocess/gvcf/src/cleanvcf.py @@ -0,0 +1,40 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/usr/bin/env python + +from __future__ import print_function +import sys + +def is_header(line): + """Check if a line is header.""" + + return line.startswith('#') + +# FIELD index +# CHROM 0, POS 1, REF 3 + +def main(): + previous_CHROM = "" + previous_end_POS = 0 + + for line in sys.stdin: + if not is_header(line): + fields = line.split('\t') + CHROM = fields[0] + POS = int(fields[1]) + REF = fields[3] + if CHROM == previous_CHROM: + if POS > previous_end_POS: + print(line, end='') + previous_end_POS = max(previous_end_POS, POS + len(REF) - 1) + else: + print(line, end='') + previous_end_POS = POS + len(REF) - 1 + previous_CHROM = CHROM + else: + print(line, end='') + +if __name__ == '__main__': + main() diff --git a/cwl/preprocess/gvcf/src/filter-gvcf b/cwl/preprocess/gvcf/src/filter-gvcf new file mode 100755 index 0000000000..ff710fdb2a --- /dev/null +++ b/cwl/preprocess/gvcf/src/filter-gvcf @@ -0,0 +1,60 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/usr/bin/python + +import argparse +import sys + + +def filter_gvcf(): + + # setting up inputs + parser = argparse.ArgumentParser(prog="filter-gvcf", description="Filter a \ + gVCF with a user-set quality threshold.") + parser.add_argument("-k", "--keepGQdot", help="Keeps the variant when GQ \ + is '.'", action="store_true") + parser.add_argument("quality_threshold", metavar="QUALITY", help="Quality \ + threshold", type=int) + parser.add_argument("gvcf", metavar="GVCF", nargs='?', + type=argparse.FileType('r'), default=sys.stdin, help="\ + Input gVCF to filter from stdin") + + args = parser.parse_args() + keepGQdot = args.keepGQdot + quality_threshold = args.quality_threshold + gvcf = args.gvcf + + for line in gvcf: + line = line.strip() + + # retain header and info lines + if len(line) == 0: + continue + if line[0] == '#': + print line + continue + + fields = line.split('\t') + + if len(fields) < 10: + continue + + FORMAT_fields = fields[8].split(":") + sample_fields = fields[9].split(":") + + # filter quality scores below the threshold + try: + GQ_index = FORMAT_fields.index('GQ') + GQ = sample_fields[GQ_index] + if GQ.isdigit(): + if quality_threshold <= int(GQ): + print line + elif GQ == "." and keepGQdot: + print line + except ValueError: + print line + +if __name__ == '__main__': + filter_gvcf() diff --git a/cwl/preprocess/gvcf/yml/filtercleangvcf-test.yml b/cwl/preprocess/gvcf/yml/filtercleangvcf-test.yml new file mode 100644 index 0000000000..80b6f873e9 --- /dev/null +++ b/cwl/preprocess/gvcf/yml/filtercleangvcf-test.yml @@ -0,0 +1,8 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +gvcf: + class: File + location: keep:2756bb4524567aebf16a002e94c2e407+14977/A-CUHS-CU010093-BL-COL-44045BL1.vcf.gz +cutoff: 20 diff --git a/cwl/preprocess/gvcf/yml/filtercleangvcf-wf-1kcgi.yml b/cwl/preprocess/gvcf/yml/filtercleangvcf-wf-1kcgi.yml new file mode 100644 index 0000000000..ad7f570abd --- /dev/null +++ b/cwl/preprocess/gvcf/yml/filtercleangvcf-wf-1kcgi.yml @@ -0,0 +1,8 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +gvcfdir: + class: Directory + location: keep:c9636e41a02d673ecfa52af1db1dbd6e+122394 +cutoff: 20 diff --git a/cwl/preprocess/gvcf/yml/filtercleangvcf-wf-vcfbed-test.yml b/cwl/preprocess/gvcf/yml/filtercleangvcf-wf-vcfbed-test.yml new file mode 100644 index 0000000000..619ddca012 --- /dev/null +++ b/cwl/preprocess/gvcf/yml/filtercleangvcf-wf-vcfbed-test.yml @@ -0,0 +1,8 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +gvcfdir: + class: Directory + location: keep:3c770879900f335fe58be1d9e6b42420+1353 +cutoff: 20 diff --git a/cwl/preprocess/gvcf/yml/keepGQdot-test.yml b/cwl/preprocess/gvcf/yml/keepGQdot-test.yml new file mode 100644 index 0000000000..78babbc120 --- /dev/null +++ b/cwl/preprocess/gvcf/yml/keepGQdot-test.yml @@ -0,0 +1,9 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +gvcfdir: + class: Directory + location: keep:47aa578f6360c9dfb10936d21571d6c7+1197 +cutoff: 20 +keepgqdot: true diff --git a/cwl/preprocess/haploidvcf/change_gt.js b/cwl/preprocess/haploidvcf/change_gt.js new file mode 100644 index 0000000000..fdbb659c72 --- /dev/null +++ b/cwl/preprocess/haploidvcf/change_gt.js @@ -0,0 +1,14 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +function record() { + var inputGT = SAMPLES[0].GT; + if (inputGT.indexOf('/') == -1 && inputGT.indexOf('|') == -1 ) { + SAMPLES[0].GT = inputGT + "/" + inputGT; + } else if (CHROM == 'chrM' && inputGT.indexOf('/') != -1) { + return inputGT.split('/')[0] == inputGT.split('/')[1]; + } else if (CHROM == 'chrM' && inputGT.indexOf('|') != -1) { + return inputGT.split('|')[0] == inputGT.split('|')[1]; + } +} diff --git a/cwl/preprocess/haploidvcf/fixgt-wf.cwl b/cwl/preprocess/haploidvcf/fixgt-wf.cwl new file mode 100644 index 0000000000..54e576c539 --- /dev/null +++ b/cwl/preprocess/haploidvcf/fixgt-wf.cwl @@ -0,0 +1,32 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: Workflow +label: Scatter to fix VCF by changing haploid calls and processing chrM +requirements: + ScatterFeatureRequirement: {} +inputs: + vcfdir: + type: Directory + label: Input VCF directory + +outputs: + fixedvcfs: + type: File[] + label: Fixed VCFs + outputSource: fixgt/fixedvcf + +steps: + getfiles: + run: getfiles.cwl + in: + dir: vcfdir + out: [vcfs] + fixgt: + run: fixgt.cwl + scatter: [vcf] + in: + vcf: getfiles/vcfs + out: [fixedvcf] diff --git a/cwl/preprocess/haploidvcf/fixgt.cwl b/cwl/preprocess/haploidvcf/fixgt.cwl new file mode 100644 index 0000000000..974e7647c2 --- /dev/null +++ b/cwl/preprocess/haploidvcf/fixgt.cwl @@ -0,0 +1,55 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: CommandLineTool +label: Fix VCF by changing haploid calls and processing chrM +requirements: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + coresMin: 2 + ramMin: 8000 + ShellCommandRequirement: {} +hints: + arv:RuntimeConstraints: + keep_cache: 4096 +inputs: + vcf: + type: File + label: Input VCF file + filterjs: + type: File + label: Javascript code for filtering + default: + class: File + location: change_gt.js +outputs: + fixedvcf: + type: File + label: Fixed VCF + outputBinding: + glob: "*vcf.gz" + secondaryFiles: [.tbi] +baseCommand: zcat +arguments: + - $(inputs.vcf) + - shellQuote: False + valueFrom: "|" + - "grep" + - "-v" + - "Locus GQX is less than 6 for hom deletion" + - shellQuote: False + valueFrom: "|" + - "rtg" + - "vcffilter" + - prefix: "-i" + valueFrom: "-" + - prefix: "-o" + valueFrom: $(inputs.vcf.basename) + - prefix: "--javascript" + valueFrom: $(inputs.filterjs) diff --git a/cwl/preprocess/haploidvcf/getfiles.cwl b/cwl/preprocess/haploidvcf/getfiles.cwl new file mode 100644 index 0000000000..60dd2136c7 --- /dev/null +++ b/cwl/preprocess/haploidvcf/getfiles.cwl @@ -0,0 +1,28 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: ExpressionTool +label: Create list of VCFs to process +requirements: + InlineJavascriptRequirement: {} +inputs: + dir: + type: Directory + label: Input directory of VCFs +outputs: + vcfs: + type: File[] + label: Output VCFs +expression: | + ${ + var vcfs = []; + for (var i = 0; i < inputs.dir.listing.length; i++) { + var file = inputs.dir.listing[i]; + if (file.nameext == ".gz") { + vcfs.push(file); + } + } + return {"vcfs": vcfs}; + } diff --git a/cwl/preprocess/haploidvcf/yml/fixgt-test.yml b/cwl/preprocess/haploidvcf/yml/fixgt-test.yml new file mode 100644 index 0000000000..f96fc2e21b --- /dev/null +++ b/cwl/preprocess/haploidvcf/yml/fixgt-test.yml @@ -0,0 +1,7 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcf: + class: File + location: keep:35e1d520788da317b9e51ebb52047eeb+28239/filtered_PGPC_0001_S1.genome.vcf.gz diff --git a/cwl/preprocess/haploidvcf/yml/fixgt-wf-pgpcanada.yml b/cwl/preprocess/haploidvcf/yml/fixgt-wf-pgpcanada.yml new file mode 100644 index 0000000000..8a76b318af --- /dev/null +++ b/cwl/preprocess/haploidvcf/yml/fixgt-wf-pgpcanada.yml @@ -0,0 +1,7 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfdir: + class: Directory + location: keep:35e1d520788da317b9e51ebb52047eeb+28239 diff --git a/cwl/preprocess/nonrefvcf/fixnonref-wf.cwl b/cwl/preprocess/nonrefvcf/fixnonref-wf.cwl new file mode 100644 index 0000000000..daf4d173b9 --- /dev/null +++ b/cwl/preprocess/nonrefvcf/fixnonref-wf.cwl @@ -0,0 +1,32 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: Workflow +label: Scatter to fix VCF by removing GT fields that point to and processing chrM +requirements: + ScatterFeatureRequirement: {} +inputs: + vcfdir: + type: Directory + label: Input VCF directory + +outputs: + fixedvcfs: + type: File[] + label: Fixed VCFs + outputSource: fixnonref/fixedvcf + +steps: + getfiles: + run: getfiles.cwl + in: + dir: vcfdir + out: [vcfs] + fixnonref: + run: fixnonref.cwl + scatter: [vcf] + in: + vcf: getfiles/vcfs + out: [fixedvcf] diff --git a/cwl/preprocess/nonrefvcf/fixnonref.cwl b/cwl/preprocess/nonrefvcf/fixnonref.cwl new file mode 100644 index 0000000000..c32381cda3 --- /dev/null +++ b/cwl/preprocess/nonrefvcf/fixnonref.cwl @@ -0,0 +1,46 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: CommandLineTool +label: Fix VCF by removing GT fields that point to and processing chrM +requirements: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + coresMin: 2 + ramMin: 8000 +hints: + arv:RuntimeConstraints: + keep_cache: 4096 +inputs: + vcf: + type: File + label: Input VCF file + filterjs: + type: File + label: Javascript code for filtering + default: + class: File + location: ../chrmvcf/change_gt_chrM.js +outputs: + fixedvcf: + type: File + label: Fixed VCF + outputBinding: + glob: "*vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [rtg, vcffilter] +arguments: + - prefix: "-i" + valueFrom: $(inputs.vcf) + - prefix: "-o" + valueFrom: $(inputs.vcf.basename) + - prefix: "--keep-expr" + valueFrom: "ALT.length == 1 || SAMPLES[0].GT.indexOf(String(ALT.length)) == -1" + - prefix: "--javascript" + valueFrom: $(inputs.filterjs) diff --git a/cwl/preprocess/nonrefvcf/getfiles.cwl b/cwl/preprocess/nonrefvcf/getfiles.cwl new file mode 100644 index 0000000000..60dd2136c7 --- /dev/null +++ b/cwl/preprocess/nonrefvcf/getfiles.cwl @@ -0,0 +1,28 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: ExpressionTool +label: Create list of VCFs to process +requirements: + InlineJavascriptRequirement: {} +inputs: + dir: + type: Directory + label: Input directory of VCFs +outputs: + vcfs: + type: File[] + label: Output VCFs +expression: | + ${ + var vcfs = []; + for (var i = 0; i < inputs.dir.listing.length; i++) { + var file = inputs.dir.listing[i]; + if (file.nameext == ".gz") { + vcfs.push(file); + } + } + return {"vcfs": vcfs}; + } diff --git a/cwl/preprocess/nonrefvcf/yml/fixnonref-test.yml b/cwl/preprocess/nonrefvcf/yml/fixnonref-test.yml new file mode 100644 index 0000000000..b2ee0dd295 --- /dev/null +++ b/cwl/preprocess/nonrefvcf/yml/fixnonref-test.yml @@ -0,0 +1,7 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcf: + class: File + location: keep:23c2bc9a1a35af43809dd96bf5a70f36+1061196/filtered_HG00403.haplotypeCalls.er.raw.vcf.gz diff --git a/cwl/preprocess/nonrefvcf/yml/fixnonref-wf-1kgvcf.yml b/cwl/preprocess/nonrefvcf/yml/fixnonref-wf-1kgvcf.yml new file mode 100644 index 0000000000..da9ad5c5ec --- /dev/null +++ b/cwl/preprocess/nonrefvcf/yml/fixnonref-wf-1kgvcf.yml @@ -0,0 +1,7 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfdir: + class: Directory + location: keep:23c2bc9a1a35af43809dd96bf5a70f36+1061196 diff --git a/cwl/preprocess/nonrefvcf/yml/fixnonref-wf-ukpgp.yml b/cwl/preprocess/nonrefvcf/yml/fixnonref-wf-ukpgp.yml new file mode 100644 index 0000000000..e90419b5d6 --- /dev/null +++ b/cwl/preprocess/nonrefvcf/yml/fixnonref-wf-ukpgp.yml @@ -0,0 +1,7 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfdir: + class: Directory + location: keep:e0ca3616f52f921a1e1cfc4952935a5e+588124 diff --git a/cwl/preprocess/portablevcf/bcftools-annotate.cwl b/cwl/preprocess/portablevcf/bcftools-annotate.cwl new file mode 100644 index 0000000000..4c22378af5 --- /dev/null +++ b/cwl/preprocess/portablevcf/bcftools-annotate.cwl @@ -0,0 +1,38 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: CommandLineTool +label: Remove unused annotations +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil +inputs: + vcfgz: + type: File + label: Input VCF +outputs: + annotatedvcfgz: + type: File + label: Annotated VCF + outputBinding: + glob: "*vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [bcftools, annotate] +arguments: + - prefix: "-x" + valueFrom: "INFO/customer_score1,INFO/customer_score2,INFO/ADP,INFO/ADP,INFO/HET,INFO/HOM,INFO/NC,INFO/WT,FORMAT/AO,FORMAT/GL,FORMAT/QA,FORMAT/SDP,FORMAT/RD,FORMAT/AD,FORMAT/FREQ,FORMAT/PVAL,FORMAT/RBQ,FORMAT/ABQ,FORMAT/RDF,FORMAT/RDR,FORMAT/ADF,FORMAT/ADR" + - $(inputs.vcfgz) + - prefix: "-O" + valueFrom: "z" + - prefix: "-o" + valueFrom: $(inputs.vcfgz.basename) + - shellQuote: False + valueFrom: "&&" + - "tabix" + - prefix: "-p" + valueFrom: "vcf" + - $(inputs.vcfgz.basename) diff --git a/cwl/preprocess/portablevcf/bcftools-reheader.cwl b/cwl/preprocess/portablevcf/bcftools-reheader.cwl new file mode 100644 index 0000000000..b25478ae8e --- /dev/null +++ b/cwl/preprocess/portablevcf/bcftools-reheader.cwl @@ -0,0 +1,30 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: CommandLineTool +label: Change the header of VCF +hints: + DockerRequirement: + dockerPull: vcfutil +inputs: + header: + type: File + label: Header file + vcfgz: + type: File + label: Input VCF +outputs: + reheaderedvcfgz: + type: File + label: Reheadered VCF + outputBinding: + glob: "*vcf.gz" +baseCommand: [bcftools, reheader] +arguments: + - prefix: "-h" + valueFrom: $(inputs.header) + - $(inputs.vcfgz) + - prefix: "-o" + valueFrom: $(inputs.vcfgz.basename) diff --git a/cwl/preprocess/portablevcf/cat.cwl b/cwl/preprocess/portablevcf/cat.cwl new file mode 100644 index 0000000000..5db8321d7d --- /dev/null +++ b/cwl/preprocess/portablevcf/cat.cwl @@ -0,0 +1,22 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: CommandLineTool +label: Concatenate files +hints: + DockerRequirement: + dockerPull: vcfutil +inputs: + txts: + type: File[] + label: Text files +outputs: + cattxt: + type: stdout + label: Concatenated text +baseCommand: cat +arguments: + - $(inputs.txts) +stdout: catsummary.txt diff --git a/cwl/preprocess/portablevcf/getfiles.cwl b/cwl/preprocess/portablevcf/getfiles.cwl new file mode 100644 index 0000000000..04b48a2c66 --- /dev/null +++ b/cwl/preprocess/portablevcf/getfiles.cwl @@ -0,0 +1,28 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: ExpressionTool +label: Create list of VCFs to process +requirements: + InlineJavascriptRequirement: {} +inputs: + dir: + type: Directory + label: Input directory of VCFs +outputs: + vcfgzs: + type: File[] + label: Output VCFs +expression: | + ${ + var vcfgzs = []; + for (var i = 0; i < inputs.dir.listing.length; i++) { + var file = inputs.dir.listing[i]; + if (file.nameext == ".gz") { + vcfgzs.push(file); + } + } + return {"vcfgzs": vcfgzs}; + } diff --git a/cwl/preprocess/portablevcf/header b/cwl/preprocess/portablevcf/header new file mode 100644 index 0000000000..cd010b3384 --- /dev/null +++ b/cwl/preprocess/portablevcf/header @@ -0,0 +1,104 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +##fileformat=VCFv4.1 +##FILTER= +##reference=/tmp/crunch-job-task-work/compute85.1/tmpdir/tmphHPL2y/stgdaa8fa7e-a7e3-431f-8adb-c23c092f628c/hg19.fa +##phasing=nonecontig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT freebayes diff --git a/cwl/preprocess/portablevcf/preprocess-portablevcf-wf.cwl b/cwl/preprocess/portablevcf/preprocess-portablevcf-wf.cwl new file mode 100644 index 0000000000..c808799a71 --- /dev/null +++ b/cwl/preprocess/portablevcf/preprocess-portablevcf-wf.cwl @@ -0,0 +1,69 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.0 +class: Workflow +label: Preprocess portable VCF +requirements: + arv:RunInSingleContainer: {} +hints: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + ramMin: 12000 + +inputs: + vcfgz: + type: File + label: Input VCF + header: + type: File + label: Header file + sdf: + type: Directory + label: RTG reference directory + cleanvcf: + type: File + label: Code that cleans VCFs + +outputs: + processedvcfgz: + type: File + label: Processed VCF + outputSource: bcftools-annotate/annotatedvcfgz + summary: + type: File + label: Summary file + outputSource: rtg-vcfeval/summary + +steps: + bcftools-reheader: + run: bcftools-reheader.cwl + in: + header: header + vcfgz: vcfgz + out: [reheaderedvcfgz] + + sort-clean: + run: sort-clean.cwl + in: + vcfgz: bcftools-reheader/reheaderedvcfgz + cleanvcf: cleanvcf + out: [cleanvcfgz] + + bcftools-annotate: + run: bcftools-annotate.cwl + in: + vcfgz: sort-clean/cleanvcfgz + out: [annotatedvcfgz] + + rtg-vcfeval: + run: rtg-vcfeval.cwl + in: + baselinevcfgz: bcftools-annotate/annotatedvcfgz + callsvcfgz: bcftools-annotate/annotatedvcfgz + sdf: sdf + out: [summary] diff --git a/cwl/preprocess/portablevcf/rtg-vcfeval.cwl b/cwl/preprocess/portablevcf/rtg-vcfeval.cwl new file mode 100644 index 0000000000..3244ae1b5e --- /dev/null +++ b/cwl/preprocess/portablevcf/rtg-vcfeval.cwl @@ -0,0 +1,38 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: CommandLineTool +label: RTG vcfeval to compare VCFs +hints: + DockerRequirement: + dockerPull: vcfutil +inputs: + baselinevcfgz: + type: File + label: Baseline VCF + secondaryFiles: [.tbi] + callsvcfgz: + type: File + label: Calls VCF + secondaryFiles: [.tbi] + sdf: + type: Directory + label: RTG reference directory +outputs: + summary: + type: File + label: Summary file + outputBinding: + glob: "eval/summary.txt" +baseCommand: [rtg, vcfeval] +arguments: + - prefix: "-b" + valueFrom: $(inputs.baselinevcfgz) + - prefix: "-c" + valueFrom: $(inputs.callsvcfgz) + - prefix: "-t" + valueFrom: $(inputs.sdf) + - prefix: "-o" + valueFrom: "eval" diff --git a/cwl/preprocess/portablevcf/scatter-preprocess-portablevcf-wf.cwl b/cwl/preprocess/portablevcf/scatter-preprocess-portablevcf-wf.cwl new file mode 100644 index 0000000000..845cc9b93b --- /dev/null +++ b/cwl/preprocess/portablevcf/scatter-preprocess-portablevcf-wf.cwl @@ -0,0 +1,60 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: Workflow +label: Scatter to process portable VCFs +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil + +inputs: + vcfsdir: + type: Directory + label: Input directory of VCFs + header: + type: File + label: Header file + default: + class: File + location: header + sdf: + type: Directory + label: RTG reference directory + cleanvcf: + type: File + label: Code that cleans VCFs + default: + class: File + location: ../gvcf/src/cleanvcf.py + +outputs: + processedvcfgzs: + type: File[] + label: Processed VCFs + outputSource: preprocess-portablevcf-wf/processedvcfgz + +steps: + getfiles: + run: getfiles.cwl + in: + dir: vcfsdir + out: [vcfgzs] + preprocess-portablevcf-wf: + run: preprocess-portablevcf-wf.cwl + scatter: vcfgz + in: + vcfgz: getfiles/vcfgzs + header: header + sdf: sdf + cleanvcf: cleanvcf + out: [processedvcfgz, summary] + cat: + run: cat.cwl + in: + txts: preprocess-portablevcf-wf/summary + out: [cattxt] diff --git a/cwl/preprocess/portablevcf/sort-clean.cwl b/cwl/preprocess/portablevcf/sort-clean.cwl new file mode 100644 index 0000000000..3d8fbea0a2 --- /dev/null +++ b/cwl/preprocess/portablevcf/sort-clean.cwl @@ -0,0 +1,46 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: CommandLineTool +label: Sort VCF and clean duplicate calls +requirements: + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: vcfutil +inputs: + cleanvcf: + type: File + label: Code that cleans VCFs + vcfgz: + type: File + label: Input VCF +outputs: + cleanvcfgz: + type: File + label: Clean VCF + outputBinding: + glob: "*vcf.gz" + secondaryFiles: [.tbi] +baseCommand: vcf-sort +arguments: + - "-c" + - $(inputs.vcfgz) + - shellQuote: False + valueFrom: "|" + - $(inputs.cleanvcf) + - shellQuote: False + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: False + valueFrom: ">" + - $(inputs.vcfgz.basename) + - shellQuote: False + valueFrom: "&&" + - "tabix" + - prefix: "-p" + valueFrom: "vcf" + - $(inputs.vcfgz.basename) diff --git a/cwl/preprocess/portablevcf/yml/bcftools-annotate-test.yml b/cwl/preprocess/portablevcf/yml/bcftools-annotate-test.yml new file mode 100644 index 0000000000..1a15c7d93f --- /dev/null +++ b/cwl/preprocess/portablevcf/yml/bcftools-annotate-test.yml @@ -0,0 +1,7 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfgz: + class: File + location: keep:b0571bd9751df8769ec145289f939685+479/hu007B82_1YB27IM-portable.vcf.gz diff --git a/cwl/preprocess/portablevcf/yml/preprocess-portablevcf-wf.yml b/cwl/preprocess/portablevcf/yml/preprocess-portablevcf-wf.yml new file mode 100644 index 0000000000..e736c83052 --- /dev/null +++ b/cwl/preprocess/portablevcf/yml/preprocess-portablevcf-wf.yml @@ -0,0 +1,16 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfgz: + class: File + location: keep:9f88a818cbc5b7b6614749c5508226d3+37493/hu007B82_1YB27IM-portable.vcf.gz +sdf: + class: Directory + location: keep:6cf48f302f6615f44002c6df07852778+1240 +header: + class: File + location: ../header +cleanvcf: + class: File + location: ../../gvcf/src/cleanvcf.py diff --git a/cwl/preprocess/portablevcf/yml/scatter-preprocess-portablevcf-wf.yml b/cwl/preprocess/portablevcf/yml/scatter-preprocess-portablevcf-wf.yml new file mode 100644 index 0000000000..0b57cd4e51 --- /dev/null +++ b/cwl/preprocess/portablevcf/yml/scatter-preprocess-portablevcf-wf.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfsdir: + class: Directory + location: keep:9f88a818cbc5b7b6614749c5508226d3+37493 +sdf: + class: Directory + location: keep:6cf48f302f6615f44002c6df07852778+1240 diff --git a/cwl/preprocess/portablevcf/yml/sort-clean.cwl b/cwl/preprocess/portablevcf/yml/sort-clean.cwl new file mode 100644 index 0000000000..b68d924a08 --- /dev/null +++ b/cwl/preprocess/portablevcf/yml/sort-clean.cwl @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfgz: + class: File + location: keep:9f88a818cbc5b7b6614749c5508226d3+37493/hu007B82_1YB27IM-portable.vcf.gz +cleanvcf: + class: File + location: ../../gvcf/src/cleanvcf.py diff --git a/cwl/preprocess/simons/filter-vcf.cwl b/cwl/preprocess/simons/filter-vcf.cwl new file mode 100644 index 0000000000..d155ca5ec2 --- /dev/null +++ b/cwl/preprocess/simons/filter-vcf.cwl @@ -0,0 +1,53 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.0 +class: CommandLineTool +label: Filters VCF by a specified QUAL and GQ cutoff +requirements: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + coresMin: 2 + ramMin: 8000 + ShellCommandRequirement: {} +hints: + arv:RuntimeConstraints: + keep_cache: 4096 +inputs: + vcf: + type: File + label: Input VCF file + sample: + type: string + label: Sample name of VCF + qualcutoff: + type: int + label: Filtering QUAL cutoff + gqcutoff: + type: int + label: Filtering GQ cutoff +outputs: + filteredvcf: + type: File + label: Filtered VCF + outputBinding: + glob: "*vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [bcftools, view] +arguments: + - "-Oz" + - prefix: "-o" + valueFrom: $(inputs.sample).vcf.gz + - prefix: "-e" + valueFrom: "QUAL<$(inputs.qualcutoff) | QUAL='.' | FORMAT/GQ<$(inputs.gqcutoff)" + - $(inputs.vcf) + - shellQuote: false + valueFrom: "&&" + - "tabix" + - prefix: "-p" + valueFrom: "vcf" + - $(inputs.sample).vcf.gz diff --git a/cwl/preprocess/simons/getfiles.cwl b/cwl/preprocess/simons/getfiles.cwl new file mode 100644 index 0000000000..7f1ba165c4 --- /dev/null +++ b/cwl/preprocess/simons/getfiles.cwl @@ -0,0 +1,34 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: ExpressionTool +label: Create list of VCFs and sample names +inputs: + dir: + type: Directory + label: Input directory of VCFs +outputs: + vcfs: + type: File[] + label: Output VCFs + samples: + type: string[] + label: Sample names of VCFs +requirements: + InlineJavascriptRequirement: {} +expression: | + ${ + var vcfs = []; + var samples = []; + for (var i = 0; i < inputs.dir.listing.length; i++) { + var file = inputs.dir.listing[i]; + if (file.nameext == ".gz") { + vcfs.push(file); + var sample = file.basename.split(".")[0]; + samples.push(sample); + } + } + return {"vcfs": vcfs, "samples": samples}; + } diff --git a/cwl/preprocess/simons/make-bed.cwl b/cwl/preprocess/simons/make-bed.cwl new file mode 100644 index 0000000000..6d688e540b --- /dev/null +++ b/cwl/preprocess/simons/make-bed.cwl @@ -0,0 +1,58 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" +cwlVersion: v1.0 +class: CommandLineTool +label: Make BED from VCF for regions passing a specified QUAL and GQ cutoff +requirements: + DockerRequirement: + dockerPull: vcfutil + ResourceRequirement: + coresMin: 2 + ramMin: 22000 + ShellCommandRequirement: {} +hints: + arv:RuntimeConstraints: + keep_cache: 4096 +inputs: + vcf: + type: File + label: Input VCF file + sample: + type: string + label: Sample name of VCF + qualcutoff: + type: int + label: Filtering QUAL cutoff + gqcutoff: + type: int + label: Filtering GQ cutoff +outputs: + bed: + type: stdout + label: BED for regions that pass cutoff +baseCommand: [bcftools, view] +arguments: + - prefix: "-e" + valueFrom: "QUAL<$(inputs.qualcutoff) | QUAL='.' | FORMAT/GQ<$(inputs.gqcutoff)" + - $(inputs.vcf) + - shellQuote: false + valueFrom: "|" + - "convert2bed" + - prefix: "-i" + valueFrom: "vcf" + - "-d" + - shellQuote: false + valueFrom: "|" + - "cut" + - "-f1-3" + - shellQuote: false + valueFrom: "|" + - "bedtools" + - "merge" + - prefix: "-i" + valueFrom: "-" +stdout: $(inputs.sample).bed diff --git a/cwl/preprocess/simons/make-vcf-bed-wf.cwl b/cwl/preprocess/simons/make-vcf-bed-wf.cwl new file mode 100644 index 0000000000..d14f17ff9f --- /dev/null +++ b/cwl/preprocess/simons/make-vcf-bed-wf.cwl @@ -0,0 +1,64 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +cwlVersion: v1.0 +class: Workflow +label: Scatter to filter VCF and make BED region +requirements: + ScatterFeatureRequirement: {} +inputs: + variantsvcfdir: + type: Directory + label: Input variants only VCF directory + fullvcfdir: + type: Directory + label: Input full VCF directory + qualcutoff: + type: int + label: Filtering QUAL cutoff + gqcutoff: + type: int + label: Filtering GQ cutoff + +outputs: + filteredvcfs: + type: File[] + label: Output VCFs + outputSource: filter-vcf/filteredvcf + beds: + type: File[] + label: Output BEDs + outputSource: make-bed/bed + +steps: + getvariantsvcfs: + run: getfiles.cwl + in: + dir: variantsvcfdir + out: [vcfs, samples] + getfullvcfs: + run: getfiles.cwl + in: + dir: fullvcfdir + out: [vcfs, samples] + filter-vcf: + run: filter-vcf.cwl + scatter: [vcf, sample] + scatterMethod: dotproduct + in: + vcf: getvariantsvcfs/vcfs + sample: getvariantsvcfs/samples + qualcutoff: qualcutoff + gqcutoff: gqcutoff + out: [filteredvcf] + make-bed: + run: make-bed.cwl + scatter: [vcf, sample] + scatterMethod: dotproduct + in: + vcf: getfullvcfs/vcfs + sample: getfullvcfs/samples + qualcutoff: qualcutoff + gqcutoff: gqcutoff + out: [bed] diff --git a/cwl/preprocess/simons/yml/filter-vcf-test.yml b/cwl/preprocess/simons/yml/filter-vcf-test.yml new file mode 100644 index 0000000000..c944e85ced --- /dev/null +++ b/cwl/preprocess/simons/yml/filter-vcf-test.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcf: + class: File + location: keep:c43692b61030dc10d24d84ffc97ff888+157254/LP6005441-DNA_A01.annotated.nh2.variants.vcf.gz +sample: "LP6005441-DNA_A01" +qualcutoff: 20 +gqcutoff: 20 diff --git a/cwl/preprocess/simons/yml/make-bed-test.yml b/cwl/preprocess/simons/yml/make-bed-test.yml new file mode 100644 index 0000000000..1e21beb8b4 --- /dev/null +++ b/cwl/preprocess/simons/yml/make-bed-test.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcf: + class: File + location: keep:8452af084a85eb3b49ab5d77137b7e0a+4903477/LP6005441-DNA_A01.annotated.nh.vcf.gz +sample: "LP6005441-DNA_A01" +qualcutoff: 20 +gqcutoff: 20 diff --git a/cwl/preprocess/simons/yml/make-vcf-bed-wf-simons.yml b/cwl/preprocess/simons/yml/make-vcf-bed-wf-simons.yml new file mode 100644 index 0000000000..161bf8f193 --- /dev/null +++ b/cwl/preprocess/simons/yml/make-vcf-bed-wf-simons.yml @@ -0,0 +1,12 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +variantsvcfdir: + class: Directory + location: keep:c43692b61030dc10d24d84ffc97ff888+157254 +fullvcfdir: + class: Directory + location: keep:8452af084a85eb3b49ab5d77137b7e0a+4903477 +qualcutoff: 20 +gqcutoff: 20 diff --git a/cwl/preprocess/splitvcf/concatvcf-wf.cwl b/cwl/preprocess/splitvcf/concatvcf-wf.cwl new file mode 100644 index 0000000000..3bffa7fc11 --- /dev/null +++ b/cwl/preprocess/splitvcf/concatvcf-wf.cwl @@ -0,0 +1,33 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: Workflow +label: Concatenate a set of VCFs split by chromosomes +requirements: + ScatterFeatureRequirement: {} +hints: + cwltool:LoadListingRequirement: + loadListing: shallow_listing +inputs: + vcfdirs: + type: Directory[] + label: Input VCFs directories + +outputs: + vcfs: + type: File[] + label: Concatenated VCFs + outputSource: concatvcf/vcf + secondaryFiles: [.tbi] + +steps: + concatvcf: + run: concatvcf.cwl + scatter: vcfdir + in: + vcfdir: vcfdirs + out: [vcf] diff --git a/cwl/preprocess/splitvcf/concatvcf.cwl b/cwl/preprocess/splitvcf/concatvcf.cwl new file mode 100644 index 0000000000..efddfef72e --- /dev/null +++ b/cwl/preprocess/splitvcf/concatvcf.cwl @@ -0,0 +1,40 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: CommandLineTool +label: Concatenate VCFs split by chromosomes +requirements: + DockerRequirement: + dockerPull: arvados/l7g + ResourceRequirement: + coresMin: 2 + ramMin: 8000 +hints: + arv:RuntimeConstraints: + keep_cache: 4096 +inputs: + bashscript: + type: File + label: Master script to concatenate VCFs + default: + class: File + location: src/concatvcf.sh + vcfdir: + type: Directory + label: Input VCFs directory +outputs: + vcf: + type: File + label: Concatenated VCF + outputBinding: + glob: "*vcf.gz" + secondaryFiles: [.tbi] +baseCommand: bash +arguments: + - $(inputs.bashscript) + - $(inputs.vcfdir) diff --git a/cwl/preprocess/splitvcf/src/concatvcf.sh b/cwl/preprocess/splitvcf/src/concatvcf.sh new file mode 100755 index 0000000000..57e8150dba --- /dev/null +++ b/cwl/preprocess/splitvcf/src/concatvcf.sh @@ -0,0 +1,20 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/bin/bash + +set -e +set -o pipefail + +vcfdir="$1" + +vcfchr1=`ls $vcfdir/*.chr1.*` +sample=`basename $vcfchr1 | cut -d '.' -f 1` +chroms=(chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY chrM) + +files=$(for chrom in ${chroms[@]}; do echo "$vcfdir/$sample.raw_variants.$chrom.g.vcf.gz"; done) +echo "files: ${files[@]}" + +bcftools concat ${files[@]} -n -O z -o $sample.vcf.gz +tabix $sample.vcf.gz diff --git a/cwl/preprocess/splitvcf/yml/concatvcf-test.yml b/cwl/preprocess/splitvcf/yml/concatvcf-test.yml new file mode 100644 index 0000000000..578179c70c --- /dev/null +++ b/cwl/preprocess/splitvcf/yml/concatvcf-test.yml @@ -0,0 +1,7 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfdir: + class: Directory + location: keep:d24d44b265ae3e38799c4f7301cbf6c3+5241 diff --git a/cwl/preprocess/splitvcf/yml/concatvcf-wf-test.yml b/cwl/preprocess/splitvcf/yml/concatvcf-wf-test.yml new file mode 100644 index 0000000000..35eed0fa6a --- /dev/null +++ b/cwl/preprocess/splitvcf/yml/concatvcf-wf-test.yml @@ -0,0 +1,461 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfdirs: + - class: Directory + location: keep:d24d44b265ae3e38799c4f7301cbf6c3+5241 + - class: Directory + location: keep:4618438a7ceaca560a5cfa1823030667+4485 + - class: Directory + location: keep:ac1e06ab75d3349583f056352b71889b+5539 + - class: Directory + location: keep:ad6fec9bdd7350310803d237219b749d+4136 + - class: Directory + location: keep:0055b1f52ad49c4a22e50c64eaadbc1a+4987 + - class: Directory + location: keep:193fa6830c3a1fc4d96921823241cb2a+5660 + - class: Directory + location: keep:733fae4f0d7d054d02af264c751d18a7+4400 + - class: Directory + location: keep:f4957d5445b1dd29a642dd83adb674b0+4180 + - class: Directory + location: keep:80f588645ab70600e4f5a6ac388d99c7+3493 + - class: Directory + location: keep:6a3f10cc1cf6fe4e75ffa33c89e69a13+5536 + - class: Directory + location: keep:bda45fe62339ca89e52d1291e6567323+3497 + - class: Directory + location: keep:ba0b6ba37ecfec399ac95585ad4ec6ce+4696 + - class: Directory + location: keep:38782fb12161a93e8726d51689e472f8+4779 + - class: Directory + location: keep:65d5701bd4bd5507c5ab8aaaf85b5ba2+4400 + - class: Directory + location: keep:13fcaad3d7d3adb74ebcc0a0e6f027af+4096 + - class: Directory + location: keep:2365ad35f1000ce1fbd4a50fb5fcffb3+6462 + - class: Directory + location: keep:db4c9a379ce7ac19d2d1f1c822e0eeba+3970 + - class: Directory + location: keep:166c4f7b57bfd7cd99ff098c2a3ac52e+5032 + - class: Directory + location: keep:187f854ea63b95995fd3f548fa23a997+4482 + - class: Directory + location: keep:75acc717e6decb8255da3d96cbace070+5328 + - class: Directory + location: keep:fda99bbdf0ac9943811feef5431a57ff+5114 + - class: Directory + location: keep:567fab24ecb57ea41ca8a506479d27b8+6126 + - class: Directory + location: keep:ad3781bb7980f24e9c22382b88f7c813+3586 + - class: Directory + location: keep:8ca5bf1bb1af0af31e922a80158cdf3f+4357 + - class: Directory + location: keep:ea30c0ae929a417580151a1500d75e5f+5451 + - class: Directory + location: keep:b7dae07470bd05aa59bac28f3207640e+5112 + - class: Directory + location: keep:d899dfb98321bf816898f96fc10e78b2+5538 + - class: Directory + location: keep:50fb97d70e0a6f752dd68fd43052c9db+5241 + - class: Directory + location: keep:6141abb4574a7702e7784da17def366c+3674 + - class: Directory + location: keep:632b73968d6d42b238284a3fbd500801+4401 + - class: Directory + location: keep:b93604163adcc1fd6e468f001cc12106+3544 + - class: Directory + location: keep:1f3d41e84cc7e0d8bc0888aae7acc5bf+5453 + - class: Directory + location: keep:6b26815278d0121a3b1c197e4481d320+4444 + - class: Directory + location: keep:695861a88de1590e44d6c672a3df1be2+4013 + - class: Directory + location: keep:8d69c651e900f20bc8ba899d4b245b9b+4866 + - class: Directory + location: keep:ea298b58ea1ed4e94165fe030639ff8c+4184 + - class: Directory + location: keep:a03b834c8898256fd462eda2feb2e83f+6171 + - class: Directory + location: keep:294f4c6c20f3b7f305f2f2585455d596+4014 + - class: Directory + location: keep:f8606ca857ff0a450a0d18a623b1f3b8+5746 + - class: Directory + location: keep:14d0b9b59ea6b2e5828b96101eb7a189+4229 + - class: Directory + location: keep:0f41c29dae36dccf866e0aa0df624223+4733 + - class: Directory + location: keep:ca65d1982e7698639bf8e851de57c95f+6039 + - class: Directory + location: keep:f6143cc3b4a208a8b050358c5075feaa+4267 + - class: Directory + location: keep:86e8014321d6e64f868eb956ee33279d+6040 + - class: Directory + location: keep:5bdc4afcb1b5a48d4a213ef9d14f5b66+5537 + - class: Directory + location: keep:8bbe0f33b1f96cc448b73ea29b13fa05+3970 + - class: Directory + location: keep:ad2619b369001e92c6a8929112a3e802+6463 + - class: Directory + location: keep:594969732a5a787581d84b12415ea6a1+3967 + - class: Directory + location: keep:048a2e05e2128f05b50d8c7e8667cccd+5497 + - class: Directory + location: keep:76e77ceaa3dde8a31c0fd70c4b212ed3+6127 + - class: Directory + location: keep:628cf8d9caf1402cef7d6a746d57e3a0+5829 + - class: Directory + location: keep:d9c637434673f267135aba455075153d+4357 + - class: Directory + location: keep:221c4c7f0fd0d37e72eaf45c164cd783+4265 + - class: Directory + location: keep:912f3aa2f9ee24b189af41732452de48+5498 + - class: Directory + location: keep:0af97b78c139b51d72bfbd6dadbf81c9+4566 + - class: Directory + location: keep:4f740097a65fc1dca17fba44af9da3b0+3844 + - class: Directory + location: keep:354f3af52a35e75412315c3b19d956d8+7220 + - class: Directory + location: keep:36d66871b43209c87e69ea85a8b4c5c9+6084 + - class: Directory + location: keep:830d48b689c3b8582573ca3b9b569500+5412 + - class: Directory + location: keep:60850336a290364e609caa96f72fc70a+5999 + - class: Directory + location: keep:5bca8b3256ed22250242caa1a01611c8+5956 + - class: Directory + location: keep:84de811ea49ad0153db5606a2566ce3b+5071 + - class: Directory + location: keep:f7a3295100c5c5735a23795fbdaf6034+4862 + - class: Directory + location: keep:54b87549459758a414deb9eac38aa841+4522 + - class: Directory + location: keep:641a9ca375fd46970aae5ca06be39041+5114 + - class: Directory + location: keep:16fb96918f11cb9af55b94ba30f741b3+3588 + - class: Directory + location: keep:011aecfb2bf24d5cc628abad09328fd4+3280 + - class: Directory + location: keep:9b0af97106d50e81d577d7782ca6350e+5911 + - class: Directory + location: keep:2537e1fb26425a3bce7c04a33fd618f6+5497 + - class: Directory + location: keep:9cd3e4e95784b22244e6fd4252112e18+4358 + - class: Directory + location: keep:05da96299af677ce5b719bb58dbc6ad6+3592 + - class: Directory + location: keep:c258e1121c65c6a98efdf891f262de55+4736 + - class: Directory + location: keep:1e4a75104b98922d9c9b8f2bc864f148+5866 + - class: Directory + location: keep:e009fcb61ebe9a2f602db344730041d6+5454 + - class: Directory + location: keep:2961d6b5887f3cff5cc9452f200f3b18+5281 + - class: Directory + location: keep:5b45e3c046c74c45a8f868ae5d14bc56+3545 + - class: Directory + location: keep:cbd66172ac3a4756c2bcf76c1d565840+5455 + - class: Directory + location: keep:fb176f2e05f99d0d814e1326fdcc9485+4141 + - class: Directory + location: keep:cf33d92e949057ebea253299f11abeda+5622 + - class: Directory + location: keep:289329596a74adbc9d390a25b404db8f+4315 + - class: Directory + location: keep:c41fd5161e38b30e639502f8df246599+4569 + - class: Directory + location: keep:d23b4ad254c7e0fae82ad78776567089+5327 + - class: Directory + location: keep:d585d21b452860412547931b09338299+5914 + - class: Directory + location: keep:a2d9dd61e935f127afa7605519356c69+5202 + - class: Directory + location: keep:3a50023554d102be1af7a68307777de4+3799 + - class: Directory + location: keep:f994d727ced1c46f50019a3474c0a4f2+3672 + - class: Directory + location: keep:4462d85bb04cdd49d17da80f6bfd776a+5620 + - class: Directory + location: keep:bc5d9debc9c3a8e254d737666075ba6e+4567 + - class: Directory + location: keep:5ec91c7a86862bd77683d92b8c740f45+4611 + - class: Directory + location: keep:3ff29c24ffc69353a7400323f901192f+3459 + - class: Directory + location: keep:db20379681c4f771a2abd95f0e2f787e+5201 + - class: Directory + location: keep:9921901515f0d9c8919535a0397d711d+5622 + - class: Directory + location: keep:47ef8e39657d08ed0bcc9f713d8a2d99+4397 + - class: Directory + location: keep:b262cb419fc6849ab14b73dc16563665+6208 + - class: Directory + location: keep:841fa9252acd6e045e521a6b9997cb06+5409 + - class: Directory + location: keep:7a2417c467f2113fbdbcee7d250390c0+4775 + - class: Directory + location: keep:579960f6698c5044f589b2ea82c07825+5069 + - class: Directory + location: keep:791493d4b469db1fe0157bee6cc909a0+6883 + - class: Directory + location: keep:4622de7734552622979a93af0e5e7c23+5539 + - class: Directory + location: keep:58bb23119f496fa0214e201bce14f106+3842 + - class: Directory + location: keep:fa601c3e3fe58d25b2bfef9bf11dd9fe+5411 + - class: Directory + location: keep:54d53aee94ccfb5503f18760d3183ee0+3502 + - class: Directory + location: keep:40ba7b10f601ad7eaf6c2f4896b7b062+4400 + - class: Directory + location: keep:039eeb9ba43ba05810b63dc5ddbd7d1d+4275 + - class: Directory + location: keep:b6e40ed4a05c3e1ab46dcbc147a902ec+4522 + - class: Directory + location: keep:88f3098d7e4cbaad6c82be519a3c55ea+4860 + - class: Directory + location: keep:31894b9be9d683995b4dcc0e9bfbaa4e+5242 + - class: Directory + location: keep:f3b3d525f4558ec8d9ca5653a8effff1+5158 + - class: Directory + location: keep:48aa2624dabea1afa424e6ef56e415c3+4100 + - class: Directory + location: keep:26b9f40dca9076c986a9e640a6741ff6+5454 + - class: Directory + location: keep:07fa4aa5acc2b9406395219489021eb7+3926 + - class: Directory + location: keep:0e7f2aeee513cd9c60e2830fcd0521e7+4988 + - class: Directory + location: keep:000272da0e36f3ad965f91d3c785f223+4526 + - class: Directory + location: keep:a8603769d5908bf590b3e5e8e0d283fc+5370 + - class: Directory + location: keep:ee31c07013596533cf41497c49b98bea+5387 + - class: Directory + location: keep:db3182ec5412536e88b28913d0a22ada+5893 + - class: Directory + location: keep:c4aa350f6c1c24d39ec1e3f2ee03e319+10402 + - class: Directory + location: keep:3c49124b2fe87575c6fbbb22bd84bc8a+5770 + - class: Directory + location: keep:cedbeb74b2939d66673ea4e9034701c8+13980 + - class: Directory + location: keep:80930b1bbc50cfb710530be3c5a38084+9219 + - class: Directory + location: keep:801ef17053aa91406e853c3e5afec733+6864 + - class: Directory + location: keep:5a6f5b98c27b6449ec61f16ca20c6219+8838 + - class: Directory + location: keep:bf84aed2ad39d7a6a8709e4a786048d3+8879 + - class: Directory + location: keep:fb3f9ba3ae13454c4e3c50735bae1cf5+8624 + - class: Directory + location: keep:80316f42c52b5e1b69a3fb5e0c1f1ddc+5897 + - class: Directory + location: keep:fe389f368a539c800c26c0bc5fb742d7+4286 + - class: Directory + location: keep:0e308f382a914fa7c9206d9e4cc77921+3381 + - class: Directory + location: keep:3ddf70e354c0dd984375dabbf28fc59a+6354 + - class: Directory + location: keep:b5d10ce0c73cabfbac498fb78eecfc06+5133 + - class: Directory + location: keep:e8fa3dfbda584f7bf628c98210e834e7+6354 + - class: Directory + location: keep:0e7c7081e6134d0d6506e05aa2ec2156+6017 + - class: Directory + location: keep:40465d3bf5da56a3251d43157f72280e+6059 + - class: Directory + location: keep:14439d0abe42d802818b2217f0d6b2ab+6484 + - class: Directory + location: keep:badebc224945d0e9184dd0cb4cfae8e4+12086 + - class: Directory + location: keep:118dc90f74a9b2f5123b691469b188c3+10610 + - class: Directory + location: keep:460714ee63d216b0d3e31e7df4d56ef1+9133 + - class: Directory + location: keep:950f777c77cc3297193a1589f7369907+5645 + - class: Directory + location: keep:5e92c5473fb4082c766bc8e6a6d565ff+5723 + - class: Directory + location: keep:1a878e042a6589dcd2cbf7f5871ad59e+9518 + - class: Directory + location: keep:300d5cba22f4a4ea23070e754f3dcbe5+8414 + - class: Directory + location: keep:99c05547b54571271c0247c875273d5a+8127 + - class: Directory + location: keep:f99173d4ea915f8a555c8573af29db28+9095 + - class: Directory + location: keep:fa049a5fca2922b63f99d18f31f8e4e8+6330 + - class: Directory + location: keep:3fbe7fd8fa060ac932967413696b9c81+9447 + - class: Directory + location: keep:a26bd7a31ff75026d40a2da60ae419bd+6458 + - class: Directory + location: keep:000eab0395e66bc3713918779c23279f+8692 + - class: Directory + location: keep:c8f8a9e80405cbd66db3bae9e54f1a13+7720 + - class: Directory + location: keep:3da0c715c14e6aa7ca42c07b889282ea+5533 + - class: Directory + location: keep:20e265c6d928d52753cc63c9d7f78cb4+5558 + - class: Directory + location: keep:7825900466a2fd915996e85563d143c4+5236 + - class: Directory + location: keep:031b8fb70b62a432fcb64ff131615f7e+6270 + - class: Directory + location: keep:6fba515068ce39cfd380ab84a8944527+6020 + - class: Directory + location: keep:6d293abe59b879411bb9078d10fb56ae+6064 + - class: Directory + location: keep:ee1a1b528312c32a279f69a8e650fb66+5979 + - class: Directory + location: keep:b8f88a4c961f45ad8abbf69a94df2c18+6229 + - class: Directory + location: keep:c22ea1ed97e753c1c7c9c7f5cdc67b0a+6821 + - class: Directory + location: keep:5fe068b8da7169c44f268136e3149cb6+5603 + - class: Directory + location: keep:8e5e40c387c49513a08e94f295a32a62+5753 + - class: Directory + location: keep:3ee5a66df9c62c70f02562fcee7318a2+5964 + - class: Directory + location: keep:3f575d430e959a7c17511baf70fb2eeb+7598 + - class: Directory + location: keep:29663929ce1833893627e5cb60567054+5960 + - class: Directory + location: keep:6d04859c4ba2394a2a9168c7858292c9+8063 + - class: Directory + location: keep:a84b2c7921fb9388272b45d309fdd9e9+6273 + - class: Directory + location: keep:b418c733542f662e0a9e803b62869c99+6313 + - class: Directory + location: keep:019ab93b84966cc79ab08e33b27a80bd+6526 + - class: Directory + location: keep:d24e2eb9b20a2576513923571c64ccd8+6360 + - class: Directory + location: keep:5ad8f70496af453d89c14188521b4fc7+6610 + - class: Directory + location: keep:47721d4b0594ab33fcb506bdc3db0598+6385 + - class: Directory + location: keep:51a06e0f5729cb4c107f55d6c8529a63+5602 + - class: Directory + location: keep:f04b354ff7b9ca5d384f3a1a62cf71d9+5603 + - class: Directory + location: keep:fbe0344b41bb5d7e4cc1d2c3979bc928+5625 + - class: Directory + location: keep:f9c7197f69cecc0e61a64f1a4b0852e2+5770 + - class: Directory + location: keep:4aa25af9a26023744d7ea569b47c7410+6569 + - class: Directory + location: keep:080f588bf847f736afff2f9ed0ae2322+4755 + - class: Directory + location: keep:d3fd12f7113d5f8f98c022e35c39ca37+5465 + - class: Directory + location: keep:732ada30026c0cda98767d0de1cb471f+5599 + - class: Directory + location: keep:a5f5110ff8d02a65ddcdceadaa2d2e70+6101 + - class: Directory + location: keep:0343282034802ee2ce544e802326c94a+6904 + - class: Directory + location: keep:4cb76d00b852b59f2a8b02097ae681de+6821 + - class: Directory + location: keep:aa7389dd1210cce54963946ef62254c1+5343 + - class: Directory + location: keep:92945c6d1adaad79b9b49b6753c83c0a+6486 + - class: Directory + location: keep:000b0c2f2178a87122d7d4a457e078aa+7748 + - class: Directory + location: keep:28904a543f55f7b2cb74776a8ebbdd4e+6484 + - class: Directory + location: keep:3cc1a31b2091917d957e7bffe13fcd9a+6526 + - class: Directory + location: keep:d83e65bb86859184e5f9faf27f3faed1+8711 + - class: Directory + location: keep:93aaae32487a543c463d326b3fc97273+6695 + - class: Directory + location: keep:61b6750dcee20583e203995b834d038a+6738 + - class: Directory + location: keep:84ad1c8dd9a579552ba716ac55bf5358+5938 + - class: Directory + location: keep:c2f1f15d4979f594246d59988b388e90+5343 + - class: Directory + location: keep:40b236d5a6c08cd99449fd3d448cd34f+5219 + - class: Directory + location: keep:e99ad05c03b4ca78056def975357c33e+7357 + - class: Directory + location: keep:f33adc2512543b97b9ab83c8817b25b8+6382 + - class: Directory + location: keep:8f9965977d52671e7fb3f7b04c6688a0+6818 + - class: Directory + location: keep:eeb6989418ad1779ed2ed6bfce61b56d+6483 + - class: Directory + location: keep:50abc3635341fd2dc74cf1ac82717e86+6018 + - class: Directory + location: keep:179334d433abd2cf712a6ddf928e7dae+6062 + - class: Directory + location: keep:5256caf06d5d611dc89d6d45b19c961c+5851 + - class: Directory + location: keep:05a0664d1a252cd58bc73839c71ff3b5+6147 + - class: Directory + location: keep:3b1bb7557c5dca8ab4081701c93eab02+6733 + - class: Directory + location: keep:5200db4989f48b68ac91436dfa3b70e3+6019 + - class: Directory + location: keep:57f2909df7fe65a13d97a714dbac60e8+6438 + - class: Directory + location: keep:58c6974d0178c4e017ed194791e8e63d+7111 + - class: Directory + location: keep:27bd5b627a26014528b3b5a9e4f8973c+5597 + - class: Directory + location: keep:b82473f5704b7c15b1d43aaabadba8d6+6271 + - class: Directory + location: keep:cee144cd698a64b65a5439b7f423cfaf+6693 + - class: Directory + location: keep:eb01a678af872384129e2a9f081a668f+5345 + - class: Directory + location: keep:cda369ef18201bfdc7817ed1be13d0b6+6146 + - class: Directory + location: keep:bb302fccf71c54665b240e105e166424+4285 + - class: Directory + location: keep:7d27bc720f96d96e00695aee5cb96fc6+5937 + - class: Directory + location: keep:30ecf50e1c96904e64077038fef7345f+6314 + - class: Directory + location: keep:5027177bf4cd967718770a27e49aef7c+7792 + - class: Directory + location: keep:9c441a1c11d7dcca710b35d8e9b9f191+3039 + - class: Directory + location: keep:dcc047dfb8b5801d3057799576d26543+9427 + - class: Directory + location: keep:a47eb87943c0f6b431c30214720f5353+6736 + - class: Directory + location: keep:98f7b440486b2bd467a6fa244e5a1b21+9336 + - class: Directory + location: keep:5eac8f5f4f204f3717af3ffe18499417+7576 + - class: Directory + location: keep:5fa71e545b03d751f43efa83ae256177+6821 + - class: Directory + location: keep:9b70322df1f1c512f29482e65c3023e1+5815 + - class: Directory + location: keep:5eec0be3c3953751bce308157b368761+7492 + - class: Directory + location: keep:d9bba0ab3740c8b3ce1e12cf1368ec74+6820 + - class: Directory + location: keep:a252d847cf48aaf8bc5838440c884b9b+8293 + - class: Directory + location: keep:ea4916d4a77bf641917403d0d27ff5f1+8126 + - class: Directory + location: keep:18354a5104483120d3df163d41749e00+7620 + - class: Directory + location: keep:879c142801965bcf44ae655f5882fd02+8952 + - class: Directory + location: keep:576bcb8f7e6f9d2d60030c2478757f3c+6126 + - class: Directory + location: keep:761ea5d6cd141f044b986926e6d012d2+7453 + - class: Directory + location: keep:d37bb67c57dbc5588724c0cc66fc639d+10018 + - class: Directory + location: keep:a815b0140c1aadeb1066bd7961fcea3e+5935 diff --git a/cwl/preprocess/vcfbed/get-vcfbed.cwl b/cwl/preprocess/vcfbed/get-vcfbed.cwl new file mode 100644 index 0000000000..e7f090baf5 --- /dev/null +++ b/cwl/preprocess/vcfbed/get-vcfbed.cwl @@ -0,0 +1,62 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +requirements: + InlineJavascriptRequirement: {} +hints: + cwltool:LoadListingRequirement: + loadListing: deep_listing +cwlVersion: v1.0 +class: ExpressionTool +label: Scatter over directory to pair VCF, BED and index files +inputs: + vcfsdir: + type: Directory + label: Directory containing compressed VCF, BED, and index files for processing + bedfile: + type: File? + label: Optional BED to scatter over if not included in vcfsdir +outputs: + vcfs: + type: File[] + label: Array of compressed VCF files from input directory + secondaryFiles: [.tbi] + beds: + type: File[] + label: Array of BED files from input directory + outnames: + type: string[] + label: Array of file names to maintain naming convention for gVCF conversion +expression: | + ${ + var vcfs = []; + var beds = []; + var outnames = []; + + for (var i = 0; i < inputs.vcfsdir.listing.length; i++) { + var file = inputs.vcfsdir.listing[i]; + if (file.nameext == '.gz') { + var main = file; + var baseName = file.nameroot.split(".")[0]; + var mainName = baseName+'.vcf.gz'; + for (var j = 0; j < inputs.vcfsdir.listing.length; j++) { + var file = inputs.vcfsdir.listing[j]; + if (file.basename == baseName+".tbi") { + main.secondaryFiles = [file]; + } else if (inputs.bedfile) { + var bed = inputs.bedfile; + } else if (file.basename == baseName+".bed") { + var bed = file; + } + } + vcfs.push(main); + beds.push(bed); + outnames.push(mainName); + } + } + return {"vcfs": vcfs, "beds": beds, "outnames": outnames}; + } diff --git a/cwl/preprocess/vcfbed/intersect-vcfbed.cwl b/cwl/preprocess/vcfbed/intersect-vcfbed.cwl new file mode 100644 index 0000000000..b7e35013a2 --- /dev/null +++ b/cwl/preprocess/vcfbed/intersect-vcfbed.cwl @@ -0,0 +1,52 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: CommandLineTool +label: Intersect VCF and BED +requirements: + - class: ShellCommandRequirement + - class: DockerRequirement + dockerPull: l7g/preprocess-vcfbed + - class: ResourceRequirement + ramMin: 12000 +inputs: + vcf: + type: File + label: VCF to be intersected + bed: + type: File + label: BED to intersect with VCF +outputs: + intersectedvcf: + type: File + label: Intersected VCF with 100% alignment + outputBinding: + glob: "*.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: [bedtools, intersect] +arguments: + - "-header" + - prefix: "-a" + valueFrom: $(inputs.vcf) + - prefix: "-b" + valueFrom: $(inputs.bed) + - prefix: "-f" + valueFrom: "1" + - shellQuote: false + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: false + valueFrom: ">" + - $(inputs.vcf.basename) + - shellQuote: false + valueFrom: "&&" + - "tabix" + - prefix: "-p" + valueFrom: "vcf" + - $(inputs.vcf.basename) diff --git a/cwl/preprocess/vcfbed/sort-bed.cwl b/cwl/preprocess/vcfbed/sort-bed.cwl new file mode 100644 index 0000000000..c2ad861ac1 --- /dev/null +++ b/cwl/preprocess/vcfbed/sort-bed.cwl @@ -0,0 +1,32 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: CommandLineTool +label: Sort BED by natural ordering (1,2,10,M,X) +requirements: + - class: ShellCommandRequirement + - class: DockerRequirement + dockerPull: l7g/preprocess-vcfbed +inputs: + bed: + type: File + label: BED to be sorted by natural ordering +outputs: + sortedbed: + type: File + label: BED sorted by natural ordering + outputBinding: + glob: "*.bed" +baseCommand: sort +arguments: + - prefix: "-k1,1V" + valueFrom: "-k2,2n" + - $(inputs.bed) + - shellQuote: False + valueFrom: ">" + - $(inputs.bed.basename) \ No newline at end of file diff --git a/cwl/preprocess/vcfbed/sort-vcf.cwl b/cwl/preprocess/vcfbed/sort-vcf.cwl new file mode 100644 index 0000000000..6050c8fe4e --- /dev/null +++ b/cwl/preprocess/vcfbed/sort-vcf.cwl @@ -0,0 +1,42 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: CommandLineTool +label: Sort VCF by natural ordering (1,2,10,M,X) +requirements: + - class: ShellCommandRequirement + - class: DockerRequirement + dockerPull: l7g/preprocess-vcfbed +inputs: + vcf: + type: File + label: Compressed VCF to be sorted by natural ordering +outputs: + sortedvcf: + type: File + label: Compressed VCF sorted by natural ordering + outputBinding: + glob: "*.vcf.gz" + secondaryFiles: [.tbi] +baseCommand: vcf-sort +arguments: + - prefix: "-c" + valueFrom: $(inputs.vcf) + - shellQuote: False + valueFrom: "|" + - "bgzip" + - "-c" + - shellQuote: False + valueFrom: ">" + - $(inputs.vcf.basename) + - shellQuote: False + valueFrom: "&&" + - "tabix" + - prefix: "-p" + valueFrom: "vcf" + - $(inputs.vcf.basename) \ No newline at end of file diff --git a/cwl/preprocess/vcfbed/src/convert-vcf-bed-to-gvcf b/cwl/preprocess/vcfbed/src/convert-vcf-bed-to-gvcf new file mode 100755 index 0000000000..0a0716d8df --- /dev/null +++ b/cwl/preprocess/vcfbed/src/convert-vcf-bed-to-gvcf @@ -0,0 +1,66 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +#!/bin/bash +# +# vcfbed2homref is expected to be available +# The VCF file is expected to be indexed and gzip'd +# The BED file is expected to be uncompressed +# The reference file is expected to be compressed and indexed +# + +VERBOSE=1 + +invcf="$1" +inbed="$2" +inref="$3" +outvcf="$4" + +if [[ "$invcf" == "" ]] || [[ "$inbed" == "" ]] || [[ "$inref" == "" ]] ; then + echo "provide input vcf and bed file" + exit 1 +fi + +if [[ "$outvcf" == "" ]] ; then + outvcf="out.vcf.gz" +fi + +function choosecat { + inf="$1" + + ftype=`file -b -i -L "$inf" | cut -f1 -d';'` + + if [[ ! -e "$inf" ]] ; then + echo "cat" + elif [[ "$ftype" == "application/x-bzip2" ]] ; then + echo "bzcat" + elif [[ "$ftype" == "application/gzip" ]] ; then + echo "zcat" + elif [[ "$ftype" == "text/plain" ]] ; then + echo "cat" + else + echo "cat" + fi +} +vcfcat=`choosecat "$invcf"` +bedcat=`choosecat "$inbed"` + +if [[ "$VERBOSE" -eq 1 ]] ; then + echo "vcfbed2homref -r \"$inref\" -b <( $bedcat \"$inbed\" ) <( $vcfcat \"$invcf\" ) | bgzip -c > \"$outvcf\" " +fi + +vcfbed2homref -r "$inref" -b "$inbed" "$invcf" | \ + bgzip -c > "$outvcf" +r=$? + +if [[ "$VERBOSE" -eq 1 ]] ; then + echo "### finished conversion, exit code $r" + echo "### indexing $outvcf" +fi + +tabix -f "$outvcf" + +if [[ "$VERBOSE" -eq 1 ]] ; then + echo "### done" +fi \ No newline at end of file diff --git a/cwl/preprocess/vcfbed/vcfbed2gvcf-wf-lite.cwl b/cwl/preprocess/vcfbed/vcfbed2gvcf-wf-lite.cwl new file mode 100644 index 0000000000..21c0c070ab --- /dev/null +++ b/cwl/preprocess/vcfbed/vcfbed2gvcf-wf-lite.cwl @@ -0,0 +1,49 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: Workflow +label: Preprocess VCF and BED files to create a collection of gVCF files +requirements: + DockerRequirement: + dockerPull: l7g/preprocess-vcfbed + ScatterFeatureRequirement: {} + +inputs: + vcfsdir: + type: Directory + label: Directory of VCF, BED and index files + ref: + type: File + label: Reference FASTA file + bedfile: + type: File? + label: Optional BED to scatter over if not included in vcfsdir + +outputs: + result: + type: File[] + label: gVCFs and index files + outputSource: vcfbed2gvcf/result + +steps: + get-vcfbed: + run: get-vcfbed.cwl + in: + vcfsdir: vcfsdir + bedfile: bedfile + out: [vcfs, beds, outnames] + vcfbed2gvcf: + run: vcfbed2gvcf.cwl + scatter: [vcf, bed, outname] + scatterMethod: dotproduct + in: + vcf: get-vcfbed/vcfs + bed: get-vcfbed/beds + ref: ref + outname: get-vcfbed/outnames + out: [result] diff --git a/cwl/preprocess/vcfbed/vcfbed2gvcf-wf.cwl b/cwl/preprocess/vcfbed/vcfbed2gvcf-wf.cwl new file mode 100644 index 0000000000..974a39abbd --- /dev/null +++ b/cwl/preprocess/vcfbed/vcfbed2gvcf-wf.cwl @@ -0,0 +1,72 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: Workflow +label: Preprocess VCF and BED files to create a collection of gVCF files +requirements: + - class: DockerRequirement + dockerPull: l7g/preprocess-vcfbed + - class: ResourceRequirement + coresMin: 1 + ramMin: 12000 + - class: ScatterFeatureRequirement + +inputs: + vcfsdir: + type: Directory + label: Directory of VCF, BED and index files + ref: + type: File + label: Reference FASTA file + bedfile: + type: File? + label: Optional BED to scatter over if not included in vcfsdir + +outputs: + result: + type: File[] + label: Directory containing gVCF and index files + outputSource: vcfbed2gvcf/result + +steps: + get-vcfbed: + run: get-vcfbed.cwl + in: + vcfsdir: vcfsdir + bedfile: bedfile + out: [vcfs, beds, outnames] + sort-vcf: + run: sort-vcf.cwl + scatter: vcf + in: + vcf: get-vcfbed/vcfs + out: [sortedvcf] + sort-bed: + scatter: bed + run: sort-bed.cwl + in: + bed: get-vcfbed/beds + out: [sortedbed] + intersect-vcfbed: + run: intersect-vcfbed.cwl + scatter: [vcf, bed] + scatterMethod: dotproduct + in: + vcf: sort-vcf/sortedvcf + bed: sort-bed/sortedbed + out: [intersectedvcf] + vcfbed2gvcf: + run: vcfbed2gvcf.cwl + scatter: [vcf, bed, outname] + scatterMethod: dotproduct + in: + vcf: intersect-vcfbed/intersectedvcf + bed: sort-bed/sortedbed + ref: ref + outname: get-vcfbed/outnames + out: [result] diff --git a/cwl/preprocess/vcfbed/vcfbed2gvcf.cwl b/cwl/preprocess/vcfbed/vcfbed2gvcf.cwl new file mode 100644 index 0000000000..ae07886379 --- /dev/null +++ b/cwl/preprocess/vcfbed/vcfbed2gvcf.cwl @@ -0,0 +1,55 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +$namespaces: + arv: "http://arvados.org/cwl#" + cwltool: "http://commonwl.org/cwltool#" +cwlVersion: v1.0 +class: CommandLineTool +label: Creates gVCF with a given VCF, BED and reference FASTA +requirements: + - class: DockerRequirement + dockerPull: l7g/preprocess-vcfbed + - class: ResourceRequirement + coresMin: 1 +baseCommand: bash +inputs: + script: + type: File + label: Script to run vcfbed2homref, compress and index VCF + default: + class: File + location: src/convert-vcf-bed-to-gvcf + inputBinding: + position: 1 + vcf: + type: File + label: VCF to be converted to gVCF + inputBinding: + position: 2 + secondaryFiles: + - .tbi + bed: + type: File + label: BED representing called region of VCF + inputBinding: + position: 3 + ref: + type: File + label: Compressed FASTA reference + inputBinding: + position: 4 + outname: + type: string + label: String to maintain VCF naming convention for gVCF + inputBinding: + position: 5 +outputs: + result: + type: File + label: Compressed gVCF and index file + outputBinding: + glob: "*.vcf.gz" + secondaryFiles: + - .tbi diff --git a/cwl/preprocess/vcfbed/yml/single-bed.yml b/cwl/preprocess/vcfbed/yml/single-bed.yml new file mode 100644 index 0000000000..23966c10e7 --- /dev/null +++ b/cwl/preprocess/vcfbed/yml/single-bed.yml @@ -0,0 +1,13 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfsdir: + class: Directory + path: keep:890e2842e843ee611c82ffeb033b5f4c+385 +bedfile: + class: File + path: keep:a33dcf43e857a96eef48d93a76615285+140/hu01F73B_masterVarBeta-GS000037833-ASM.bed +ref: + class: File + path: keep:5858d764a2e7ef4dfbb2ca3490b3e3c1+2334/chrM_37/g1k/human_g1k_v37.fasta.gz diff --git a/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-1kcgivcfbed.yml b/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-1kcgivcfbed.yml new file mode 100644 index 0000000000..7a7eb569e5 --- /dev/null +++ b/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-1kcgivcfbed.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfsdir: + class: Directory + path: keep:e3daea88dee2bc69f5ba24f0089c7387+180598 +ref: + class: File + path: keep:5858d764a2e7ef4dfbb2ca3490b3e3c1+2334/chrM_37/g1k/human_g1k_v37.fasta.gz diff --git a/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi-5samples.yml b/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi-5samples.yml new file mode 100644 index 0000000000..bcd25fc616 --- /dev/null +++ b/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi-5samples.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfsdir: + class: Directory + path: keep:237e82da5f78bd5d8e2640547c984bec+1698 +ref: + class: File + path: keep:5858d764a2e7ef4dfbb2ca3490b3e3c1+2334/chrM_37/g1k/human_g1k_v37.fasta.gz diff --git a/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi.yml b/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi.yml new file mode 100644 index 0000000000..10c7a24438 --- /dev/null +++ b/cwl/preprocess/vcfbed/yml/vcfbed2gvcf-wf-cgi.yml @@ -0,0 +1,10 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +vcfsdir: + class: Directory + path: keep:a216d5da3c7ad73b0242e57678f48f56+58474 +ref: + class: File + path: keep:5858d764a2e7ef4dfbb2ca3490b3e3c1+2334/chrM_37/g1k/human_g1k_v37.fasta.gz diff --git a/docker/beagle5.4/Dockerfile b/docker/beagle5.4/Dockerfile new file mode 100644 index 0000000000..11656280a3 --- /dev/null +++ b/docker/beagle5.4/Dockerfile @@ -0,0 +1,59 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +FROM ubuntu:16.04 +MAINTAINER Jiayong Li +USER root + +RUN apt-get update --fix-missing -qy + +RUN apt-get install -qy wget \ + build-essential \ + cmake \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libncurses5-dev \ + libncursesw5-dev \ + gcc \ + make \ + python \ + git + +#Installing OpenJDK-8 +RUN apt-get update && \ + apt-get install -y openjdk-8-jdk && \ + apt-get install -y ant && \ + apt-get clean + +#Fixing certificate issues +RUN apt-get update && \ + apt-get install ca-certificates-java && \ + apt-get clean && \ + update-ca-certificates -f + +# Getting beagle5.4 +RUN wget https://faculty.washington.edu/browning/beagle/beagle.05May22.33a.jar +RUN wget https://faculty.washington.edu/browning/beagle/bref3.05May22.33a.jar + +# Getting HTSLIB 1.9 for tabix/bgzip + +RUN wget https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2 && tar -xjvf htslib-1.9.tar.bz2 && \ + cd htslib-1.9 && ./configure && make && make install + +WORKDIR / + +# Installing samtools 1.9 + +RUN wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2 && tar -xjvf samtools-1.9.tar.bz2 && \ + cd samtools-1.9 && ./configure && make && make install + +WORKDIR / + +# Installing bcftools 1.9 + +RUN wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2 && tar -xjvf bcftools-1.9.tar.bz2 && \ + cd bcftools-1.9 && ./configure && make && make install + +WORKDIR / diff --git a/docker/cgivar2vcfbed/Dockerfile b/docker/cgivar2vcfbed/Dockerfile new file mode 100644 index 0000000000..eb8ee227ab --- /dev/null +++ b/docker/cgivar2vcfbed/Dockerfile @@ -0,0 +1,40 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +FROM arvados/jobs +MAINTAINER Jiayong Li + +USER root + +RUN apt-get update -q + +RUN apt-get install -qy build-essential wget cmake zlib1g-dev git + +# Installing cgatools 1.8.0 + +RUN wget https://sourceforge.net/projects/cgatools/files/1.8.0/cgatools-1.8.0.1-linux_binary-x86_64.tar.gz && \ + tar -xzvf cgatools-1.8.0.1-linux_binary-x86_64.tar.gz && \ + cp cgatools-1.8.0.1-linux_binary-x86_64/bin/cgatools /usr/local/bin + +WORKDIR / + +# Getting HTSLIB for tabix/bgzip + +RUN wget https://github.com/samtools/htslib/releases/download/1.3.1/htslib-1.3.1.tar.bz2 && \ + tar -xjvf htslib-1.3.1.tar.bz2 && \ + cd htslib-1.3.1 && ./configure && make && make install + +WORKDIR / + +# Installing bedtools v2.25.0 and dependencies + +RUN wget https://github.com/arq5x/bedtools2/releases/download/v2.25.0/bedtools-2.25.0.tar.gz && \ + tar -xzvf bedtools-2.25.0.tar.gz && \ + cd bedtools2/ && make && cp /bedtools2/bin/bedtools /usr/local/bin + +WORKDIR / + +# Installing gvcf_regions + +RUN git clone https://github.com/lijiayong/gvcf_regions diff --git a/docker/lightning/Dockerfile b/docker/lightning/Dockerfile new file mode 100644 index 0000000000..d92060264f --- /dev/null +++ b/docker/lightning/Dockerfile @@ -0,0 +1,47 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +FROM ubuntu:latest +MAINTAINER Jiayong Li +USER root +ARG DEBIAN_FRONTEND=noninteractive + +# Install necessary dependencies + +RUN apt-get update --fix-missing -qy + +RUN apt-get install -qy --no-install-recommends wget \ + build-essential \ + cmake \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libncurses5-dev \ + libncursesw5-dev \ + gcc \ + make \ + python3.8 \ + python3-pip \ + python3-numpy \ + python3-pandas \ + python3-scipy \ + python3-matplotlib \ + git + +RUN pip3 install sklearn +RUN pip3 install --upgrade scipy + +# Installing go 1.19 + +RUN wget https://go.dev/dl/go1.19.3.linux-amd64.tar.gz && \ + tar -C /usr/local -xzf go1.19.3.linux-amd64.tar.gz + +ENV PATH $PATH:/usr/local/go/bin:/root/go/bin + +# Getting lightning + +RUN git clone https://github.com/arvados/lightning && \ + cd lightning && make + +WORKDIR / diff --git a/docker/snpeff/Dockerfile b/docker/snpeff/Dockerfile new file mode 100644 index 0000000000..e37c3d4cf7 --- /dev/null +++ b/docker/snpeff/Dockerfile @@ -0,0 +1,55 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +FROM ubuntu:18.04 +MAINTAINER Jiayong Li +USER root + +# Install necessary dependencies + +RUN apt-get update --fix-missing -qy + +RUN apt-get install -qy wget \ + build-essential \ + cmake \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libncurses5-dev \ + libncursesw5-dev \ + gcc \ + make \ + python \ + python3 \ + git \ + default-jdk \ + unzip + +# Getting HTSLIB 1.10.2 for tabix/bgzip + +RUN wget https://github.com/samtools/htslib/releases/download/1.10.2/htslib-1.10.2.tar.bz2 && tar -xjvf htslib-1.10.2.tar.bz2 && \ + cd htslib-1.10.2 && ./configure && make && make install + +WORKDIR / + +# Installing samtools 1.10 + +RUN wget https://github.com/samtools/samtools/releases/download/1.10/samtools-1.10.tar.bz2 && tar -xjvf samtools-1.10.tar.bz2 && \ + cd samtools-1.10 && ./configure && make && make install + +WORKDIR / + +# Installing bcftools 1.10.2 + +RUN wget https://github.com/samtools/bcftools/releases/download/1.10.2/bcftools-1.10.2.tar.bz2 && tar -xjvf bcftools-1.10.2.tar.bz2 && \ + cd bcftools-1.10.2 && ./configure && make && make install + +WORKDIR / + +# Getting snpEff4.3t + +RUN wget http://sourceforge.net/projects/snpeff/files/snpEff_v4_3t_core.zip && \ + unzip snpEff_v4_3t_core.zip + +WORKDIR / diff --git a/docker/vcfutil/Dockerfile b/docker/vcfutil/Dockerfile new file mode 100644 index 0000000000..d3427b74ec --- /dev/null +++ b/docker/vcfutil/Dockerfile @@ -0,0 +1,66 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +FROM arvados/jobs +MAINTAINER Jiayong Li + +USER root + +RUN apt-get update -q + +RUN apt-get install -qy build-essential wget cmake zlib1g-dev \ + libbz2-dev liblzma-dev libncurses5-dev libncursesw5-dev git vcftools + +# Getting HTSLIB 1.9 for tabix/bgzip + +RUN wget https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2 && tar -xjvf htslib-1.9.tar.bz2 && \ + cd htslib-1.9 && ./configure && make && make install + +WORKDIR / + +# Installing samtools 1.9 + +RUN wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2 && tar -xjvf samtools-1.9.tar.bz2 && \ + cd samtools-1.9 && ./configure && make && make install + +WORKDIR / + +# Installing bcftools 1.9 + +RUN wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2 && tar -xjvf bcftools-1.9.tar.bz2 && \ + cd bcftools-1.9 && ./configure && make && make install + +WORKDIR / + +# Installing rtg tools v3.8.4 + +RUN apt-get install -qy unzip + +RUN wget https://github.com/RealTimeGenomics/rtg-tools/releases/download/3.9.1/rtg-tools-3.9.1-linux-x64.zip && \ + unzip rtg-tools-3.9.1-linux-x64.zip && \ + cd rtg-tools-3.9.1 && ./rtg help + +ENV PATH="/rtg-tools-3.9.1:${PATH}" + +WORKDIR / + +# Installing bedops v2.4.35 + +RUN wget https://github.com/bedops/bedops/releases/download/v2.4.35/bedops_linux_x86_64-v2.4.35.tar.bz2 && \ + tar jxvf bedops_linux_x86_64-v2.4.35.tar.bz2 && \ + cp bin/* /usr/local/bin + +WORKDIR / + +# Installing bedtools 2.27.1 + +RUN wget https://github.com/arq5x/bedtools2/releases/download/v2.27.1/bedtools-2.27.1.tar.gz && \ + tar -xzvf bedtools-2.27.1.tar.gz && \ + cd bedtools2 && make && cp bin/* /usr/local/bin + +WORKDIR / + +# Installing gvcf_regions + +RUN git clone https://github.com/lijiayong/gvcf_regions -- 2.30.2