From e301ae5f984acd04d6521c8c452164e84a3a0e40 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 20 Oct 2022 10:07:11 -0400 Subject: [PATCH] 19524: Limit size of PCA input matrix. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- slicenumpy.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/slicenumpy.go b/slicenumpy.go index 62982f5407..a9fee14c8d 100644 --- a/slicenumpy.go +++ b/slicenumpy.go @@ -80,7 +80,7 @@ func (cmd *sliceNumpy) run(prog string, args []string, stdin io.Reader, stdout, onehotChunked := flags.Bool("chunked-onehot", false, "generate one-hot tile-based matrix per input chunk") onlyPCA := flags.Bool("pca", false, "generate pca matrix") pcaComponents := flags.Int("pca-components", 4, "number of PCA components") - maxPCATiles := flags.Int("max-pca-tiles", 100000, "maximum tiles to use as PCA input (filter, then drop every 2nd colum pair until below max)") + maxPCATiles := flags.Int("max-pca-tiles", 0, "maximum tiles to use as PCA input (filter, then drop every 2nd colum pair until below max)") debugTag := flags.Int("debug-tag", -1, "log debugging details about specified tag") flags.IntVar(&cmd.threads, "threads", 16, "number of memory-hungry assembly threads, and number of VCPUs to request for arvados container") flags.StringVar(&cmd.chi2CaseControlFile, "chi2-case-control-file", "", "tsv file or directory indicating cases and controls for Χ² test (if directory, all .tsv files will be read)") @@ -136,6 +136,7 @@ func (cmd *sliceNumpy) run(prog string, args []string, stdin io.Reader, stdout, "-chunked-onehot=" + fmt.Sprintf("%v", *onehotChunked), "-pca=" + fmt.Sprintf("%v", *onlyPCA), "-pca-components=" + fmt.Sprintf("%d", *pcaComponents), + "-max-pca-tiles=" + fmt.Sprintf("%d", *maxPCATiles), "-chi2-case-control-file=" + cmd.chi2CaseControlFile, "-chi2-case-control-column=" + cmd.chi2CaseControlColumn, "-chi2-p-value=" + fmt.Sprintf("%f", cmd.chi2PValue), @@ -1123,7 +1124,7 @@ func (cmd *sliceNumpy) run(prog string, args []string, stdin io.Reader, stdout, } log.Printf("have %d one-hot cols", cols) stride := 1 - for cols > *maxPCATiles*2 { + for *maxPCATiles > 0 && cols > *maxPCATiles*2 { cols = cols / 2 stride = stride * 2 } -- 2.30.2