return 0
}
-type goPCA struct{}
+type goPCA struct {
+ filter filter
+}
func (cmd *goPCA) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
var err error
outputFilename := flags.String("o", "-", "output `file`")
components := flags.Int("components", 4, "number of components")
onehot := flags.Bool("one-hot", false, "recode tile variants as one-hot")
+ cmd.filter.Flags(flags)
err = flags.Parse(args)
if err == flag.ErrHelp {
err = nil
Name: "lightning pca-go",
Client: arvados.NewClientFromEnv(),
ProjectUUID: *projectUUID,
- RAM: 100000000000, // maybe 10x input size?
- VCPUs: 2,
+ RAM: 300000000000, // maybe 10x input size?
+ VCPUs: 16,
Priority: *priority,
}
err = runner.TranslatePaths(inputFilename)
return 1
}
runner.Args = []string{"pca-go", "-local=true", fmt.Sprintf("-one-hot=%v", *onehot), "-i", *inputFilename, "-o", "/mnt/output/pca.npy"}
+ runner.Args = append(runner.Args, cmd.filter.Args()...)
var output string
output, err = runner.Run()
if err != nil {
defer input.Close()
}
log.Print("reading")
- tilelib := tileLibrary{
+ tilelib := &tileLibrary{
retainNoCalls: true,
compactGenomes: map[string][]tileVariantID{},
}
return 1
}
+ log.Info("filtering")
+ cmd.filter.Apply(tilelib)
+ log.Info("tidying")
+ tilelib.Tidy()
+
log.Print("converting cgs to array")
- data, rows, cols := cgs2array(&tilelib)
+ data, rows, cols, _ := cgs2array(tilelib)
if *onehot {
log.Printf("recode one-hot: %d rows, %d cols", rows, cols)
data, _, cols = recodeOnehot(data, cols)
}
+ tilelib = nil
log.Printf("creating matrix backed by array: %d rows, %d cols", rows, cols)
mtx := array2matrix(rows, cols, data).T()