refs #20821
authorswz <swz@curii.com>
Wed, 11 Oct 2023 17:08:20 +0000 (17:08 +0000)
committerswz <swz@curii.com>
Wed, 11 Oct 2023 17:08:20 +0000 (17:08 +0000)
Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek <swz@curii.com>

.licenseignore
cwl/lightning/fasta2numpy-test-release-wf.cwl
cwl/lightning/figures/fasta2numpy.dot [new file with mode: 0644]
cwl/lightning/lightning-import.cwl
docker/lightning/Dockerfile

index 0d77b26cc448356494453a417b0bbfa4c6d3531e..2274f17c684bb93f38d7b16c7f8fd921877fce9e 100644 (file)
@@ -4,3 +4,4 @@ go.mod
 go.sum
 testdata/*
 readme.md
+lightning/cwl/figures/*
index 7b860ccc2b76a3fda4cbb2129a860243233e44dc..dbf8d8632bc46d645915158bb1c7ed60d23caffb 100644 (file)
@@ -56,6 +56,15 @@ inputs:
   readmeinfo: string[]
 
 outputs: 
+  stagednpydir:
+    type: Directory
+    outputSource: stage-output/stagednpydir
+  stagedonehotnpydir:
+    type: Directory
+    outputSource: stage-output/stagedonehotnpydir
+  stagedannotationdir:
+    type: Directory
+    outputSource: stage-output/stagedannotationdir
   readme:
     type: File
     outputSource: genreadme/readme
@@ -110,6 +119,29 @@ steps:
       randomseed: randomseed
     out: [samplescsv]
 
+  lightning-slice-numpy:
+    run: lightning-slice-numpy.cwl
+    in:
+      matchgenome: matchgenome
+      libdir: lightning-slice/libdir
+      regions: regions
+      threads: threads
+      mergeoutput: mergeoutput
+      expandregions: expandregions
+      samplescsv: lightning-choose-samples/samplescsv
+    out: [outdir, npys, chunktagoffsetcsv]
+
+  lightning-slice-numpy-onehot_logisticregression:
+    run: lightning-slice-numpy-onehot.cwl
+    in:
+      matchgenome: matchgenome
+      libdir: lightning-slice/libdir
+      regions: regions
+      mergeoutput: mergeoutput
+      expandregions: expandregions
+      samplescsv: lightning-slice-numpy-pca/pcasamplescsv
+    out: [outdir, npys]
+
   lightning-slice-numpy-pca:
     run: lightning-slice-numpy-pca.cwl
     in:
@@ -122,8 +154,8 @@ steps:
       samplescsv: lightning-choose-samples/samplescsv
       pcacomponents: pcacomponents
     out: [outdir, pcanpy, pcasamplescsv]
-
-  lightning-slice-numpy:
+  
+  lightning-slice-numpy-onehot:
     run: lightning-slice-numpy-onehot.cwl
     in:
       matchgenome: matchgenome
@@ -131,7 +163,7 @@ steps:
       regions: regions
       mergeoutput: mergeoutput
       expandregions: expandregions
-      samplescsv: lightning-slice-numpy-pca/pcasamplescsv
+      samplescsv: lightning-choose-samples/samplescsv
     out: [outdir, npys]
 
   lightning-plot_1-2:
@@ -161,7 +193,7 @@ steps:
   lightning-anno2vcf-onehot:
     run: lightning-anno2vcf.cwl
     in:
-      annodir: lightning-slice-numpy/outdir
+      annodir: lightning-slice-numpy-onehot/outdir
     out: [vcfdir]
 
   make-libname:
@@ -183,6 +215,22 @@ steps:
       gnomaddir: gnomaddir
     out: [annotatedvcf, summary]
 
+  stage-output:
+    run: stage-output.cwl
+    in:
+      libname: make-libname/libname
+      npyfiles:
+        source: [lightning-slice-numpy/npys, lightning-slice-numpy/chunktagoffsetcsv]
+        linkMerge: merge_flattened
+      onehotnpyfiles: lightning-slice-numpy-onehot/npys
+      pcapngs:
+        source: [lightning-plot_1-2/png, lightning-plot_2-3/png]
+        linkMerge: merge_flattened
+      bed: lightning-tiling-stats/bed
+      annotatedvcf: annotate-wf/annotatedvcf
+      summary: annotate-wf/summary
+    out: [stagednpydir, stagedonehotnpydir, stagedannotationdir]
+
   genreadme:
     run: genreadme.cwl
     in:
diff --git a/cwl/lightning/figures/fasta2numpy.dot b/cwl/lightning/figures/fasta2numpy.dot
new file mode 100644 (file)
index 0000000..4f2bae2
--- /dev/null
@@ -0,0 +1,193 @@
+digraph G {
+bgcolor="#eeeeee";
+clusterrank=local;
+labeljust=right;
+labelloc=bottom;
+"lightning-anno2vcf-onehot" [fillcolor=lightgoldenrodyellow, label="lightning-anno2vcf-onehot", shape=record, style=filled];
+"annotate-wf" [fillcolor="#F3CEA1", label="annotate-wf", shape=record, style=filled];
+"lightning-anno2vcf-onehot" -> "annotate-wf";
+"make-libname" [fillcolor=lightgoldenrodyellow, label="make-libname", shape=record, style=filled];
+"annotate-wf" [fillcolor="#F3CEA1", label="annotate-wf", shape=record, style=filled];
+"make-libname" -> "annotate-wf";
+"lightning-choose-samples" [fillcolor=lightgoldenrodyellow, label="lightning-choose-samples", shape=record, style=filled];
+"genreadme" [fillcolor=lightgoldenrodyellow, label=genreadme, shape=record, style=filled];
+"lightning-choose-samples" -> "genreadme";
+"lightning-slice-numpy-onehot" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-onehot", shape=record, style=filled];
+"lightning-anno2vcf-onehot" [fillcolor=lightgoldenrodyellow, label="lightning-anno2vcf-onehot", shape=record, style=filled];
+"lightning-slice-numpy-onehot" -> "lightning-anno2vcf-onehot";
+"lightning-slice" [fillcolor=lightgoldenrodyellow, label="lightning-slice", shape=record, style=filled];
+"lightning-choose-samples" [fillcolor=lightgoldenrodyellow, label="lightning-choose-samples", shape=record, style=filled];
+"lightning-slice" -> "lightning-choose-samples";
+"batch-dirs" [fillcolor=lightgoldenrodyellow, label="batch-dirs", shape=record, style=filled];
+"lightning-import_data" [fillcolor=lightgoldenrodyellow, label="lightning-import_data", shape=record, style=filled];
+"batch-dirs" -> "lightning-import_data";
+"lightning-slice-numpy-pca" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-pca", shape=record, style=filled];
+"lightning-plot_1-2" [fillcolor=lightgoldenrodyellow, label="lightning-plot_1-2", shape=record, style=filled];
+"lightning-slice-numpy-pca" -> "lightning-plot_1-2";
+"lightning-slice-numpy-pca" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-pca", shape=record, style=filled];
+"lightning-plot_1-2" [fillcolor=lightgoldenrodyellow, label="lightning-plot_1-2", shape=record, style=filled];
+"lightning-slice-numpy-pca" -> "lightning-plot_1-2";
+"lightning-slice-numpy-pca" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-pca", shape=record, style=filled];
+"lightning-plot_2-3" [fillcolor=lightgoldenrodyellow, label="lightning-plot_2-3", shape=record, style=filled];
+"lightning-slice-numpy-pca" -> "lightning-plot_2-3";
+"lightning-slice-numpy-pca" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-pca", shape=record, style=filled];
+"lightning-plot_2-3" [fillcolor=lightgoldenrodyellow, label="lightning-plot_2-3", shape=record, style=filled];
+"lightning-slice-numpy-pca" -> "lightning-plot_2-3";
+"lightning-import_data" [fillcolor=lightgoldenrodyellow, label="lightning-import_data", shape=record, style=filled];
+"lightning-slice" [fillcolor=lightgoldenrodyellow, label="lightning-slice", shape=record, style=filled];
+"lightning-import_data" -> "lightning-slice";
+"lightning-import_refs" [fillcolor=lightgoldenrodyellow, label="lightning-import_refs", shape=record, style=filled];
+"lightning-slice" [fillcolor=lightgoldenrodyellow, label="lightning-slice", shape=record, style=filled];
+"lightning-import_refs" -> "lightning-slice";
+"lightning-choose-samples" [fillcolor=lightgoldenrodyellow, label="lightning-choose-samples", shape=record, style=filled];
+"lightning-slice-numpy" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy", shape=record, style=filled];
+"lightning-choose-samples" -> "lightning-slice-numpy";
+"lightning-slice" [fillcolor=lightgoldenrodyellow, label="lightning-slice", shape=record, style=filled];
+"lightning-slice-numpy" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy", shape=record, style=filled];
+"lightning-slice" -> "lightning-slice-numpy";
+"lightning-slice" [fillcolor=lightgoldenrodyellow, label="lightning-slice", shape=record, style=filled];
+"lightning-slice-numpy-onehot" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-onehot", shape=record, style=filled];
+"lightning-slice" -> "lightning-slice-numpy-onehot";
+"lightning-choose-samples" [fillcolor=lightgoldenrodyellow, label="lightning-choose-samples", shape=record, style=filled];
+"lightning-slice-numpy-onehot" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-onehot", shape=record, style=filled];
+"lightning-choose-samples" -> "lightning-slice-numpy-onehot";
+"lightning-slice-numpy-pca" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-pca", shape=record, style=filled];
+"lightning-slice-numpy-onehot_logisticregression" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-onehot_logisticregression", shape=record, style=filled];
+"lightning-slice-numpy-pca" -> "lightning-slice-numpy-onehot_logisticregression";
+"lightning-slice" [fillcolor=lightgoldenrodyellow, label="lightning-slice", shape=record, style=filled];
+"lightning-slice-numpy-onehot_logisticregression" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-onehot_logisticregression", shape=record, style=filled];
+"lightning-slice" -> "lightning-slice-numpy-onehot_logisticregression";
+"lightning-slice" [fillcolor=lightgoldenrodyellow, label="lightning-slice", shape=record, style=filled];
+"lightning-slice-numpy-pca" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-pca", shape=record, style=filled];
+"lightning-slice" -> "lightning-slice-numpy-pca";
+"lightning-choose-samples" [fillcolor=lightgoldenrodyellow, label="lightning-choose-samples", shape=record, style=filled];
+"lightning-slice-numpy-pca" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-pca", shape=record, style=filled];
+"lightning-choose-samples" -> "lightning-slice-numpy-pca";
+"lightning-slice" [fillcolor=lightgoldenrodyellow, label="lightning-slice", shape=record, style=filled];
+"lightning-tiling-stats" [fillcolor=lightgoldenrodyellow, label="lightning-tiling-stats", shape=record, style=filled];
+"lightning-slice" -> "lightning-tiling-stats";
+"lightning-slice-numpy" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy", shape=record, style=filled];
+"stage-output" [fillcolor=lightgoldenrodyellow, label="stage-output", shape=record, style=filled];
+"lightning-slice-numpy" -> "stage-output";
+"lightning-slice-numpy-onehot" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy-onehot", shape=record, style=filled];
+"stage-output" [fillcolor=lightgoldenrodyellow, label="stage-output", shape=record, style=filled];
+"lightning-slice-numpy-onehot" -> "stage-output";
+"lightning-plot_1-2" [fillcolor=lightgoldenrodyellow, label="lightning-plot_1-2", shape=record, style=filled];
+"stage-output" [fillcolor=lightgoldenrodyellow, label="stage-output", shape=record, style=filled];
+"lightning-plot_1-2" -> "stage-output";
+"lightning-plot_2-3" [fillcolor=lightgoldenrodyellow, label="lightning-plot_2-3", shape=record, style=filled];
+"stage-output" [fillcolor=lightgoldenrodyellow, label="stage-output", shape=record, style=filled];
+"lightning-plot_2-3" -> "stage-output";
+"annotate-wf" [fillcolor="#F3CEA1", label="annotate-wf", shape=record, style=filled];
+"stage-output" [fillcolor=lightgoldenrodyellow, label="stage-output", shape=record, style=filled];
+"annotate-wf" -> "stage-output";
+"lightning-tiling-stats" [fillcolor=lightgoldenrodyellow, label="lightning-tiling-stats", shape=record, style=filled];
+"stage-output" [fillcolor=lightgoldenrodyellow, label="stage-output", shape=record, style=filled];
+"lightning-tiling-stats" -> "stage-output";
+"make-libname" [fillcolor=lightgoldenrodyellow, label="make-libname", shape=record, style=filled];
+"stage-output" [fillcolor=lightgoldenrodyellow, label="stage-output", shape=record, style=filled];
+"make-libname" -> "stage-output";
+"lightning-slice-numpy" [fillcolor=lightgoldenrodyellow, label="lightning-slice-numpy", shape=record, style=filled];
+"stage-output" [fillcolor=lightgoldenrodyellow, label="stage-output", shape=record, style=filled];
+"lightning-slice-numpy" -> "stage-output";
+"annotate-wf" [fillcolor="#F3CEA1", label="annotate-wf", shape=record, style=filled];
+"stage-output" [fillcolor=lightgoldenrodyellow, label="stage-output", shape=record, style=filled];
+"annotate-wf" -> "stage-output";
+subgraph cluster_inputs {
+label="Workflow Inputs";
+rank=same;
+style=dashed;
+"chrs" [fillcolor="#94DDF4", label=chrs, shape=record, style=filled];
+"dbsnp" [fillcolor="#94DDF4", label=dbsnp, shape=record, style=filled];
+"genomeversion" [fillcolor="#94DDF4", label=genomeversion, shape=record, style=filled];
+"genomeversion" [fillcolor="#94DDF4", label=genomeversion, shape=record, style=filled];
+"gnomaddir" [fillcolor="#94DDF4", label=gnomaddir, shape=record, style=filled];
+"snpeffdatadir" [fillcolor="#94DDF4", label=snpeffdatadir, shape=record, style=filled];
+"batchsize" [fillcolor="#94DDF4", label=batchsize, shape=record, style=filled];
+"fastadirs" [fillcolor="#94DDF4", label=fastadirs, shape=record, style=filled];
+"readmeinfo" [fillcolor="#94DDF4", label=readmeinfo, shape=record, style=filled];
+"matchgenome" [fillcolor="#94DDF4", label=matchgenome, shape=record, style=filled];
+"matchgenome" [fillcolor="#94DDF4", label=matchgenome, shape=record, style=filled];
+"matchgenome" [fillcolor="#94DDF4", label=matchgenome, shape=record, style=filled];
+"matchgenome" [fillcolor="#94DDF4", label=matchgenome, shape=record, style=filled];
+"matchgenome" [fillcolor="#94DDF4", label=matchgenome, shape=record, style=filled];
+"matchgenome" [fillcolor="#94DDF4", label=matchgenome, shape=record, style=filled];
+"phenotypesnofamilydir" [fillcolor="#94DDF4", label=phenotypesnofamilydir, shape=record, style=filled];
+"randomseed" [fillcolor="#94DDF4", label=randomseed, shape=record, style=filled];
+"trainingsetsize" [fillcolor="#94DDF4", label=trainingsetsize, shape=record, style=filled];
+"tagset" [fillcolor="#94DDF4", label=tagset, shape=record, style=filled];
+"tagset" [fillcolor="#94DDF4", label=tagset, shape=record, style=filled];
+"refdir" [fillcolor="#94DDF4", label=refdir, shape=record, style=filled];
+"phenotypesdir" [fillcolor="#94DDF4", label=phenotypesdir, shape=record, style=filled];
+"phenotypesdir" [fillcolor="#94DDF4", label=phenotypesdir, shape=record, style=filled];
+"expandregions" [fillcolor="#94DDF4", label=expandregions, shape=record, style=filled];
+"expandregions" [fillcolor="#94DDF4", label=expandregions, shape=record, style=filled];
+"expandregions" [fillcolor="#94DDF4", label=expandregions, shape=record, style=filled];
+"expandregions" [fillcolor="#94DDF4", label=expandregions, shape=record, style=filled];
+"mergeoutput" [fillcolor="#94DDF4", label=mergeoutput, shape=record, style=filled];
+"mergeoutput" [fillcolor="#94DDF4", label=mergeoutput, shape=record, style=filled];
+"mergeoutput" [fillcolor="#94DDF4", label=mergeoutput, shape=record, style=filled];
+"mergeoutput" [fillcolor="#94DDF4", label=mergeoutput, shape=record, style=filled];
+"regions" [fillcolor="#94DDF4", label=regions, shape=record, style=filled];
+"regions" [fillcolor="#94DDF4", label=regions, shape=record, style=filled];
+"regions" [fillcolor="#94DDF4", label=regions, shape=record, style=filled];
+"regions" [fillcolor="#94DDF4", label=regions, shape=record, style=filled];
+"pcacomponents" [fillcolor="#94DDF4", label=pcacomponents, shape=record, style=filled];
+"threads" [fillcolor="#94DDF4", label=threads, shape=record, style=filled];
+"threads" [fillcolor="#94DDF4", label=threads, shape=record, style=filled];
+}
+
+"chrs" -> "annotate-wf";
+"dbsnp" -> "annotate-wf";
+"genomeversion" -> "annotate-wf";
+"genomeversion" -> "make-libname";
+"gnomaddir" -> "annotate-wf";
+"snpeffdatadir" -> "annotate-wf";
+"batchsize" -> "batch-dirs";
+"fastadirs" -> "batch-dirs";
+"readmeinfo" -> "genreadme";
+"matchgenome" -> "lightning-choose-samples";
+"matchgenome" -> "lightning-slice-numpy-onehot";
+"matchgenome" -> "lightning-slice-numpy-onehot_logisticregression";
+"matchgenome" -> "lightning-slice-numpy-pca";
+"matchgenome" -> "lightning-slice-numpy";
+"matchgenome" -> "make-libname";
+"phenotypesnofamilydir" -> "lightning-choose-samples";
+"randomseed" -> "lightning-choose-samples";
+"trainingsetsize" -> "lightning-choose-samples";
+"tagset" -> "lightning-import_data";
+"tagset" -> "lightning-import_refs";
+"refdir" -> "lightning-import_refs";
+"phenotypesdir" -> "lightning-plot_1-2";
+"phenotypesdir" -> "lightning-plot_2-3";
+"expandregions" -> "lightning-slice-numpy-onehot";
+"expandregions" -> "lightning-slice-numpy-onehot_logisticregression";
+"expandregions" -> "lightning-slice-numpy-pca";
+"expandregions" -> "lightning-slice-numpy";
+"mergeoutput" -> "lightning-slice-numpy-onehot";
+"mergeoutput" -> "lightning-slice-numpy-onehot_logisticregression";
+"mergeoutput" -> "lightning-slice-numpy-pca";
+"mergeoutput" -> "lightning-slice-numpy";
+"regions" -> "lightning-slice-numpy-onehot";
+"regions" -> "lightning-slice-numpy-onehot_logisticregression";
+"regions" -> "lightning-slice-numpy-pca";
+"regions" -> "lightning-slice-numpy";
+"pcacomponents" -> "lightning-slice-numpy-pca";
+"threads" -> "lightning-slice-numpy-pca";
+"threads" -> "lightning-slice-numpy";
+subgraph cluster_outputs {
+label="Workflow Outputs";
+labelloc=b;
+rank=same;
+style=dashed;
+"readme" [fillcolor="#94DDF4", label=readme, shape=record, style=filled];
+"stagedannotationdir" [fillcolor="#94DDF4", label=stagedannotationdir, shape=record, style=filled];
+"stagednpydir" [fillcolor="#94DDF4", label=stagednpydir, shape=record, style=filled];
+"stagedonehotnpydir" [fillcolor="#94DDF4", label=stagedonehotnpydir, shape=record, style=filled];
+}
+
+"genreadme" -> "readme";
+"stage-output" -> "stagedannotationdir";
+"stage-output" -> "stagednpydir";
+"stage-output" -> "stagedonehotnpydir";
+}
index 3903bbbcdebe2c7c3a058fc6975d7ded386eaadb..d32a2d5fe1601a6f8d135c2178a60e763d1d8f0a 100644 (file)
@@ -13,8 +13,8 @@ hints:
   DockerRequirement:
     dockerPull: lightning
   ResourceRequirement:
-    coresMin: 8 #96
-    ramMin: 25000 #670000
+    coresMin: 64  #8 #96
+    ramMin: 300000 #25000 #670000
   arv:RuntimeConstraints:
     keep_cache: 6200
     outputDirType: keep_output_dir
@@ -41,6 +41,7 @@ arguments:
   - "-output-tiles=true"
   - "-batches=1"
   - "-batch=0"
+  - "-GOMAXPROCS=48"
   - prefix: "-save-incomplete-tiles="
     valueFrom: $(inputs.saveincomplete)
     separate: false
index d11a30b43d827e53c31d0203fc379ae42d688c0c..d760fc159a50ebf4dbed951a06393d12a3e148c7 100644 (file)
@@ -29,7 +29,7 @@ RUN apt-get install -qy --no-install-recommends wget \
   python3-matplotlib \
   git
 
-RUN pip3 install sklearn
+RUN pip3 install scikit-learn
 RUN pip3 install --upgrade scipy
 RUN pip3 install matplotlib