+func (*exportNumpy) writeLibRefs(fnm string, tilelib *tileLibrary, librefs []tileLibRef) error {
+ f, err := os.OpenFile(fnm, os.O_CREATE|os.O_WRONLY, 0666)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ for i, libref := range librefs {
+ _, err = fmt.Fprintf(f, "%d,%d,%d\n", i, libref.Tag, libref.Variant)
+ if err != nil {
+ return err
+ }
+ }
+ return f.Close()
+}
+
+func cgs2array(tilelib *tileLibrary) (data []int16, rows, cols int, cgnames []string) {
+ for name := range tilelib.compactGenomes {
+ cgnames = append(cgnames, name)
+ }
+ sort.Strings(cgnames)
+
+ rows = len(tilelib.compactGenomes)
+ for _, cg := range tilelib.compactGenomes {
+ if cols < len(cg) {
+ cols = len(cg)
+ }
+ }
+
+ // flag low-quality tile variants so we can change to -1 below
+ lowqual := make([]map[tileVariantID]bool, cols/2)
+ for tag, variants := range tilelib.variant {
+ lq := lowqual[tag]
+ for varidx, hash := range variants {
+ if len(tilelib.seq[hash]) == 0 {
+ if lq == nil {
+ lq = map[tileVariantID]bool{}
+ lowqual[tag] = lq
+ }
+ lq[tileVariantID(varidx+1)] = true
+ }
+ }
+ }
+
+ data = make([]int16, rows*cols)
+ for row, name := range cgnames {
+ for i, v := range tilelib.compactGenomes[name] {
+ if v > 0 && lowqual[i/2][v] {
+ data[row*cols+i] = -1
+ } else {
+ data[row*cols+i] = int16(v)
+ }
+ }
+ }
+
+ return
+}
+
+func recodeOnehot(in []int16, incols int) (out []int16, librefs []tileLibRef, outcols int) {
+ rows := len(in) / incols
+ maxvalue := make([]int16, incols)
+ for row := 0; row < rows; row++ {
+ for col := 0; col < incols; col++ {
+ if v := in[row*incols+col]; maxvalue[col] < v {
+ maxvalue[col] = v
+ }
+ }
+ }
+ outcol := make([]int, incols)
+ dropped := 0
+ for incol, maxv := range maxvalue {
+ outcol[incol] = outcols
+ if maxv == 0 {
+ dropped++
+ }
+ for v := 1; v <= int(maxv); v++ {
+ librefs = append(librefs, tileLibRef{Tag: tagID(incol), Variant: tileVariantID(v)})
+ outcols++
+ }
+ }
+ log.Printf("recodeOnehot: dropped %d input cols with zero maxvalue", dropped)
+
+ out = make([]int16, rows*outcols)
+ for inidx, row := 0, 0; row < rows; row++ {
+ outrow := out[row*outcols:]
+ for col := 0; col < incols; col++ {
+ if v := in[inidx]; v > 0 {
+ outrow[outcol[col]+int(v)-1] = 1
+ }
+ inidx++
+ }
+ }
+ return
+}
+