cmd.cgnames = nil
var tagset [][]byte
- DecodeLibrary(in0, strings.HasSuffix(infiles[0], ".gz"), func(ent *LibraryEntry) error {
+ err = DecodeLibrary(in0, strings.HasSuffix(infiles[0], ".gz"), func(ent *LibraryEntry) error {
if len(ent.TagSet) > 0 {
tagset = ent.TagSet
}
onehotChunkSize = make([]uint32, len(infiles))
onehotXrefs = make([][]onehotXref, len(infiles))
}
+ chunkStartTag := make([]tagID, len(infiles))
throttleMem := throttle{Max: cmd.threads} // TODO: estimate using mem and data size
throttleNumpyMem := throttle{Max: cmd.threads/2 + 1}
}
tagstart := cgs[cmd.cgnames[0]].StartTag
tagend := cgs[cmd.cgnames[0]].EndTag
+ chunkStartTag[infileIdx] = tagstart
// TODO: filters
continue
}
if v == 0 {
- out[outidx] = -1 // 0 better?
+ out[outidx] = 0 // tag not found / spanning tile
} else if variants, ok := seq[tag]; ok && int(v) < len(variants) && len(variants[v].Sequence) > 0 {
out[outidx] = int16(variantRemap[tag-tagstart][v])
} else {
- out[outidx] = -1
+ out[outidx] = -1 // low quality tile variant
}
if tag == cmd.debugTag {
log.Printf("tag %d row %d col %d outidx %d v %d out %d", tag, row, col, outidx, v, out[outidx])
return 1
}
fnm = fmt.Sprintf("%s/onehot-columns.npy", *outputDir)
- err = writeNumpyInt32(fnm, onehotXref2int32(xrefs), 4, len(xrefs))
+ err = writeNumpyInt32(fnm, onehotXref2int32(xrefs), 5, len(xrefs))
if err != nil {
return 1
}
}
+ if !*mergeOutput && !*onehotChunked && !*onehotSingle {
+ tagoffsetFilename := *outputDir + "/chunk-tag-offset.csv"
+ log.Infof("writing tag offsets to %s", tagoffsetFilename)
+ var f *os.File
+ f, err = os.Create(tagoffsetFilename)
+ if err != nil {
+ return 1
+ }
+ defer f.Close()
+ for idx, offset := range chunkStartTag {
+ _, err = fmt.Fprintf(f, "%q,%d\n", fmt.Sprintf("matrix.%04d.npy", idx), offset)
+ if err != nil {
+ err = fmt.Errorf("write %s: %w", tagoffsetFilename, err)
+ return 1
+ }
+ }
+ err = f.Close()
+ if err != nil {
+ err = fmt.Errorf("close %s: %w", tagoffsetFilename, err)
+ return 1
+ }
+ }
return 0
}
type onehotXref struct {
tag tagID
variant tileVariantID
- het bool
+ hom bool
pvalue float64
}
xref = append(xref, onehotXref{
tag: tag,
variant: tileVariantID(col >> 1),
- het: col&1 == 1,
+ hom: col&1 == 0,
pvalue: p,
})
}
// P-value row contains 1000000x actual p-value.
func onehotXref2int32(xrefs []onehotXref) []int32 {
xcols := len(xrefs)
- xdata := make([]int32, 4*xcols)
+ xdata := make([]int32, 5*xcols)
for i, xref := range xrefs {
xdata[i] = int32(xref.tag)
xdata[xcols+i] = int32(xref.variant)
- if xref.het {
+ if xref.hom {
xdata[xcols*2+i] = 1
}
xdata[xcols*3+i] = int32(xref.pvalue * 1000000)
+ xdata[xcols*4+i] = int32(-math.Log10(xref.pvalue) * 1000000)
}
return xdata
}