}
cmd.cgnames = nil
- taglen := -1
+ var tagset [][]byte
DecodeLibrary(in0, strings.HasSuffix(infiles[0], ".gz"), func(ent *LibraryEntry) error {
if len(ent.TagSet) > 0 {
- taglen = len(ent.TagSet[0])
+ tagset = ent.TagSet
}
for _, cseq := range ent.CompactSequences {
if cseq.Name == *ref || *ref == "" {
err = fmt.Errorf("%s: reference sequence not found", infiles[0])
return 1
}
- if taglen < 0 {
+ if len(tagset) == 0 {
err = fmt.Errorf("tagset not found")
return 1
}
+
+ taglib := &tagLibrary{}
+ err = taglib.setTags(tagset)
+ if err != nil {
+ return 1
+ }
+ taglen := taglib.TagLen()
+
if len(cmd.cgnames) == 0 {
err = fmt.Errorf("no genomes found matching regexp %q", cmd.filter.MatchGenome)
return 1
err = fmt.Errorf("missing tiledata for tag %d variant %d in %s in ref", libref.Tag, libref.Variant, seqname)
return 1
}
+ foundthistag := false
+ taglib.FindAll(tiledata[:len(tiledata)-1], func(tagid tagID, offset, _ int) {
+ if !foundthistag && tagid == libref.Tag {
+ foundthistag = true
+ return
+ }
+ if dupref, ok := reftile[tagid]; ok {
+ log.Printf("dropping reference tile %+v from %s @ %d, tag not unique, also found inside %+v from %s @ %d", tileLibRef{Tag: tagid, Variant: dupref.variant}, dupref.seqname, dupref.pos, libref, seqname, pos+offset+1)
+ delete(reftile, tagid)
+ } else {
+ log.Printf("found tag %d at offset %d inside tile variant %+v on %s @ %d", tagid, offset, libref, seqname, pos+offset+1)
+ }
+ isdup[tagid] = true
+ })
if isdup[libref.Tag] {
log.Printf("dropping reference tile %+v from %s @ %d, tag not unique", libref, seqname, pos)
} else if reftile[libref.Tag] != nil {
throttleNumpyMem.Release()
}
if *onehotSingle {
- log.Infof("%04d: keeping onehot chunk in memory (rows=%d, cols=%d, mem=%d)", infileIdx, len(cmd.cgnames), len(onehotChunk), (len(cmd.cgnames)+int(onehotXrefSize))*len(onehotChunk))
onehotIndirect[infileIdx] = onehotChunk2Indirect(onehotChunk)
onehotXrefs[infileIdx] = onehotXref
+ n := len(onehotIndirect[infileIdx][0])
+ log.Infof("%04d: keeping onehot coordinates in memory (n=%d, mem=%d)", infileIdx, n, n*8)
}
if !(*onehotSingle || *onehotChunked) || *mergeOutput || *hgvsSingle {
log.Infof("%04d: preparing numpy", infileIdx)
var onehot [][]int8
var xref []onehotXref
for homcol := 4; homcol < len(obs); homcol += 2 {
- p := [2]float64{
- pvalue(obs[homcol], cmd.chi2Cases),
- pvalue(obs[homcol+1], cmd.chi2Cases),
- }
- if cmd.chi2PValue < 1 && !(p[0] < cmd.chi2PValue || p[1] < cmd.chi2PValue) {
- continue
- }
for het := 0; het < 2; het++ {
+ p := pvalue(obs[homcol+het], cmd.chi2Cases)
+ if cmd.chi2PValue < 1 && !(p < cmd.chi2PValue) {
+ continue
+ }
onehot = append(onehot, bool2int8(obs[homcol+het]))
xref = append(xref, onehotXref{
tag: tag,
variant: tileVariantID(homcol / 2),
het: het == 1,
- pvalue: p[het],
+ pvalue: p,
})
}
}