19780: Fix indexing error.
[lightning.git] / tilelib.go
index 550563342c3ac9728c9d05eeb7e85c8f7ab3abb0..b6dc3671207c619d6ffe578e5a292f69eab334af 100644 (file)
@@ -61,6 +61,7 @@ type tileLibrary struct {
        retainNoCalls       bool
        skipOOO             bool
        retainTileSequences bool
+       useDups             bool
 
        taglib         *tagLibrary
        variant        [][][blake2b.Size256]byte
@@ -543,12 +544,13 @@ func (tilelib *tileLibrary) dump(out io.Writer) {
 }
 
 type importStats struct {
-       InputFile              string
-       InputLabel             string
-       InputLength            int
-       InputCoverage          int
-       PathLength             int
-       DroppedOutOfOrderTiles int
+       InputFile             string
+       InputLabel            string
+       InputLength           int
+       InputCoverage         int
+       PathLength            int
+       DroppedRepeatedTags   int
+       DroppedOutOfOrderTags int
 }
 
 func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader, matchChromosome *regexp.Regexp, isRef bool) (tileSeq, []importStats, error) {
@@ -570,6 +572,8 @@ func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader, matchChro
                                todo <- jobT{seqlabel, append([]byte(nil), fasta...)}
                                seqlabel, fasta = strings.SplitN(string(buf[1:]), " ", 2)[0], fasta[:0]
                                log.Debugf("%s %s reading fasta", filelabel, seqlabel)
+                       } else if len(buf) > 0 && buf[0] == '#' {
+                               // ignore testdata comment
                        } else {
                                fasta = append(fasta, bytes.ToLower(buf)...)
                        }
@@ -605,14 +609,33 @@ func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader, matchChro
                        log.Warnf("%s %s no tags found", filelabel, job.label)
                }
 
-               skipped := 0
+               droppedDup := 0
+               if !tilelib.useDups {
+                       // Remove any tags that appeared more than once
+                       dup := map[tagID]bool{}
+                       for _, ft := range found {
+                               _, dup[ft.tagid] = dup[ft.tagid]
+                       }
+                       dst := 0
+                       for _, ft := range found {
+                               if !dup[ft.tagid] {
+                                       found[dst] = ft
+                                       dst++
+                               }
+                       }
+                       droppedDup = len(found) - dst
+                       log.Infof("%s %s dropping %d non-unique tags", filelabel, job.label, droppedDup)
+                       found = found[:dst]
+               }
+
+               droppedOOO := 0
                if tilelib.skipOOO {
-                       log.Infof("%s %s keeping longest increasing subsequence", filelabel, job.label)
                        keep := longestIncreasingSubsequence(len(found), func(i int) int { return int(found[i].tagid) })
                        for i, x := range keep {
                                found[i] = found[x]
                        }
-                       skipped = len(found) - len(keep)
+                       droppedOOO = len(found) - len(keep)
+                       log.Infof("%s %s dropping %d out-of-order tags", filelabel, job.label, droppedOOO)
                        found = found[:len(keep)]
                }
 
@@ -651,14 +674,15 @@ func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader, matchChro
                ret[job.label] = pathcopy
 
                basesIn := countBases(job.fasta)
-               log.Infof("%s %s fasta in %d coverage in %d path len %d low-quality %d skipped-out-of-order %d", filelabel, job.label, len(job.fasta), basesIn, len(path), lowquality, skipped)
+               log.Infof("%s %s fasta in %d coverage in %d path len %d low-quality %d", filelabel, job.label, len(job.fasta), basesIn, len(path), lowquality)
                stats = append(stats, importStats{
-                       InputFile:              filelabel,
-                       InputLabel:             job.label,
-                       InputLength:            len(job.fasta),
-                       InputCoverage:          basesIn,
-                       PathLength:             len(path),
-                       DroppedOutOfOrderTiles: skipped,
+                       InputFile:             filelabel,
+                       InputLabel:            job.label,
+                       InputLength:           len(job.fasta),
+                       InputCoverage:         basesIn,
+                       PathLength:            len(path),
+                       DroppedOutOfOrderTags: droppedOOO,
+                       DroppedRepeatedTags:   droppedDup,
                })
 
                totalPathLen += len(path)