Name: "lightning pca-go",
Client: arvados.NewClientFromEnv(),
ProjectUUID: *projectUUID,
- RAM: 432000000000,
+ RAM: 100000000000, // maybe 10x input size?
VCPUs: 2,
Priority: *priority,
}
}
defer input.Close()
}
+ log.Print("reading")
cgs, err := ReadCompactGenomes(input)
if err != nil {
return 1
if err != nil {
return 1
}
+ log.Print("sorting")
sort.Slice(cgs, func(i, j int) bool { return cgs[i].Name < cgs[j].Name })
+ log.Print("converting cgs to array")
data, rows, cols := cgs2array(cgs)
if *onehot {
+ log.Printf("recode one-hot: %d rows, %d cols", rows, cols)
data, cols = recodeOnehot(data, cols)
}
- pca, err := nlp.NewPCA(*components).FitTransform(array2matrix(rows, cols, data).T())
+ cgs = nil
+
+ log.Printf("creating matrix backed by array: %d rows, %d cols", rows, cols)
+ mtx := array2matrix(rows, cols, data).T()
+
+ log.Print("fitting")
+ transformer := nlp.NewPCA(*components)
+ transformer.Fit(mtx)
+ log.Printf("transforming")
+ mtx, err = transformer.Transform(mtx)
if err != nil {
return 1
}
+ mtx = mtx.T()
- pca = pca.T()
- rows, cols = pca.Dims()
+ rows, cols = mtx.Dims()
+ log.Printf("copying result to numpy output array: %d rows, %d cols", rows, cols)
out := make([]float64, rows*cols)
for i := 0; i < rows; i++ {
for j := 0; j < cols; j++ {
- out[i*cols+j] = pca.At(i, j)
+ out[i*cols+j] = mtx.At(i, j)
}
}
return 1
}
npw.Shape = []int{rows, cols}
+ log.Printf("writing numpy: %d rows, %d cols", rows, cols)
npw.WriteFloat64(out)
err = bufw.Flush()
if err != nil {
if err != nil {
return 1
}
+ log.Print("done")
return 0
}