"os"
"regexp"
"runtime"
+ "runtime/debug"
"sort"
"strconv"
"strings"
log.Printf("after applying mask, len(reftile) == %d", len(reftile))
}
+ type hgvsColSet map[hgvs.Variant][2][]int8
+ encodeHGVS := throttle{Max: len(refseq)}
+ encodeHGVSTodo := map[string]chan hgvsColSet{}
tmpHGVSCols := map[string]*os.File{}
- bufHGVSCols := map[string]*bufio.Writer{}
- encodeHGVSCols := map[string]*gob.Encoder{}
if *hgvsChunked {
for seqname := range refseq {
var f *os.File
bufw := bufio.NewWriterSize(f, 1<<24)
enc := gob.NewEncoder(bufw)
tmpHGVSCols[seqname] = f
- bufHGVSCols[seqname] = bufw
- encodeHGVSCols[seqname] = enc
+ todo := make(chan hgvsColSet, 128)
+ encodeHGVSTodo[seqname] = todo
+ encodeHGVS.Go(func() error {
+ for colset := range todo {
+ err := enc.Encode(colset)
+ if err != nil {
+ encodeHGVS.Report(err)
+ for range todo {
+ }
+ return err
+ }
+ }
+ return bufw.Flush()
+ })
}
}
seq[tv.Tag] = variants
}
for _, cg := range ent.CompactGenomes {
- if matchGenome.MatchString(cg.Name) {
- cgs[cg.Name] = cg
+ if !matchGenome.MatchString(cg.Name) {
+ continue
+ }
+ // pad to full slice size
+ // to avoid out-of-bounds
+ // checks later
+ if sliceSize := 2 * int(cg.EndTag-cg.StartTag); len(cg.Variants) < sliceSize {
+ cg.Variants = append(cg.Variants, make([]tileVariantID, sliceSize-len(cg.Variants))...)
}
+ cgs[cg.Name] = cg
}
return nil
})
for _, cg := range cgs {
idx := int(tag-tagstart) * 2
- if idx < len(cg.Variants) {
- for allele := 0; allele < 2; allele++ {
- v := cg.Variants[idx+allele]
- if v > 0 && len(variants[v].Sequence) > 0 {
- count[variants[v].Blake2b]++
- }
+ for allele := 0; allele < 2; allele++ {
+ v := cg.Variants[idx+allele]
+ if v > 0 && len(variants[v].Sequence) > 0 {
+ count[variants[v].Blake2b]++
}
}
}
continue
}
diffs, _ := hgvs.Diff(reftilestr, strings.ToUpper(string(tv.Sequence)), 0)
+ for i := range diffs {
+ diffs[i].Position += rt.pos
+ }
for _, diff := range diffs {
- diff.Position += rt.pos
fmt.Fprintf(annow, "%d,%d,%d,%s:g.%s,%s,%d,%s,%s,%s\n", tag, outcol, v, rt.seqname, diff.String(), rt.seqname, diff.Position, diff.Ref, diff.New, diff.Left)
}
if *hgvsChunked {
// =ref or a different variant in that
// position, or (-1) is lacking
// coverage / couldn't be diffed.
- hgvsCol := map[hgvs.Variant][2][]int8{}
+ hgvsCol := hgvsColSet{}
for _, diffs := range variantDiffs {
for _, diff := range diffs {
if _, ok := hgvsCol[diff]; ok {
}
}
}
- encodeHGVSCols[rt.seqname].Encode(hgvsCol)
+ encodeHGVSTodo[rt.seqname] <- hgvsCol
}
outcol++
}
}
}
seq = nil
+ cgs = nil
+ debug.FreeOSMemory()
throttleNumpyMem.Release()
if *mergeOutput || *hgvsSingle {
if err != nil {
return err
}
+ debug.FreeOSMemory()
}
log.Infof("%s: done (%d/%d)", infile, int(atomic.AddInt64(&done, 1)), len(infiles))
return nil
if *hgvsChunked {
log.Info("flushing hgvsCols temp files")
for seqname := range refseq {
- err = bufHGVSCols[seqname].Flush()
- if err != nil {
- return 1
- }
- bufHGVSCols[seqname] = nil // free buffer memory
+ close(encodeHGVSTodo[seqname])
+ }
+ err = encodeHGVS.Wait()
+ if err != nil {
+ return 1
}
for seqname := range refseq {
log.Infof("%s: reading hgvsCols from temp file", seqname)
if err != nil {
return 1
}
- var hgvsCols map[hgvs.Variant][2][]int8
+ var hgvsCols hgvsColSet
dec := gob.NewDecoder(bufio.NewReaderSize(f, 1<<24))
for err == nil {
err = dec.Decode(&hgvsCols)