Drop duplicate tags, fix ref position tracking.
[lightning.git] / slicenumpy.go
index fe1a77497ee4e3325fa294fd7d0cf62d23605e74..fdaa02e658de2a59dc6b1bbbbdaad0ecb250f1ef 100644 (file)
@@ -192,52 +192,54 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
        type reftileinfo struct {
                variant  tileVariantID
                seqname  string // chr1
-               pos      int    // distance from start of chr1 to start of tile
+               pos      int    // distance from start of chromosome to starttag
                tiledata []byte // acgtggcaa...
        }
+       isdup := map[tagID]bool{}
        reftile := map[tagID]*reftileinfo{}
        for seqname, cseq := range refseq {
+               pos := 0
                for _, libref := range cseq {
-                       reftile[libref.Tag] = &reftileinfo{
-                               seqname:  seqname,
-                               variant:  libref.Variant,
-                               tiledata: reftiledata[libref],
+                       tiledata := reftiledata[libref]
+                       if len(tiledata) == 0 {
+                               err = fmt.Errorf("missing tiledata for tag %d variant %d in %s in ref", libref.Tag, libref.Variant, seqname)
+                               return 1
                        }
-               }
-       }
-
-       throttleCPU := throttle{Max: runtime.GOMAXPROCS(0)}
-       log.Info("reconstructing reference sequences")
-       for seqname, cseq := range refseq {
-               seqname, cseq := seqname, cseq
-               throttleCPU.Go(func() error {
-                       defer log.Printf("... %s done", seqname)
-                       pos := 0
-                       for _, libref := range cseq {
-                               rt := reftile[libref.Tag]
-                               rt.pos = pos
-                               if len(rt.tiledata) == 0 {
-                                       return fmt.Errorf("missing tiledata for tag %d variant %d in %s in ref", libref.Tag, libref.Variant, seqname)
+                       if isdup[libref.Tag] {
+                               log.Printf("dropping reference tile %+v from %s, tag not unique", libref, seqname)
+                       } else if reftile[libref.Tag] != nil {
+                               log.Printf("dropping reference tile %+v from %s, tag not unique", tileLibRef{Tag: libref.Tag, Variant: reftile[libref.Tag].variant}, reftile[libref.Tag].seqname)
+                               delete(reftile, libref.Tag)
+                               log.Printf("dropping reference tile %+v from %s, tag not unique", libref, seqname)
+                               isdup[libref.Tag] = true
+                       } else {
+                               reftile[libref.Tag] = &reftileinfo{
+                                       seqname:  seqname,
+                                       variant:  libref.Variant,
+                                       tiledata: tiledata,
+                                       pos:      pos,
                                }
-                               pos += len(rt.tiledata) - taglen
                        }
-                       return nil
-               })
+                       pos += len(tiledata) - taglen
+               }
+               log.Printf("... %s done, len %d", seqname, pos+taglen)
        }
-       throttleCPU.Wait()
 
        var mask *mask
        if *regionsFilename != "" {
+               log.Printf("loading regions from %s", *regionsFilename)
                mask, err = makeMask(*regionsFilename, *expandRegions)
                if err != nil {
                        return 1
                }
-               // Delete reftile entries for masked-out regions.
+               log.Printf("before applying mask, len(reftile) == %d", len(reftile))
+               log.Printf("deleting reftile entries for regions outside %d intervals", mask.Len())
                for tag, rt := range reftile {
                        if !mask.Check(strings.TrimPrefix(rt.seqname, "chr"), rt.pos, rt.pos+len(rt.tiledata)) {
                                delete(reftile, tag)
                        }
                }
+               log.Printf("after applying mask, len(reftile) == %d", len(reftile))
        }
 
        var toMerge [][]int16