-skip-ooo: fix skipping 7 in seq like 0-1-7-0
authorTom Clegg <tom@tomclegg.ca>
Fri, 24 Apr 2020 17:28:08 +0000 (13:28 -0400)
committerTom Clegg <tom@tomclegg.ca>
Fri, 24 Apr 2020 17:28:08 +0000 (13:28 -0400)
0-1-7-2 -> skip 7 because we will accept 2
0-1-7-0 -> keep 7 because we won't accept 0

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

tilelib.go
tilelib_test.go

index c2ea54e8126120f57f0ebe141ae9ada004b479f0..ce9d95fc9856d26f78d8eaf96517076c267af71f 100644 (file)
@@ -101,12 +101,13 @@ func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader) (tileSeq,
                        log.Tracef("%s %s found[%d] == %#v", filelabel, job.label, i, f)
                        if tilelib.skipOOO {
                                if f.tagid < last.tagid+1 {
-                                       // e.g., last=B, this=A
-                                       log.Debugf("%s %s skipped out-of-order tag %d (found at %d) because it follows tag %d (found at %d)", filelabel, job.label, f.tagid, f.pos, last.tagid, last.pos)
+                                       log.Debugf("%s %s skipped out-of-order tag %d (found at %d) because it appears after tag %d (found at %d)", filelabel, job.label, f.tagid, f.pos, last.tagid, last.pos)
                                        continue
                                }
-                               if f.tagid > last.tagid+1 && i+1 < len(found) && found[i+1].tagid <= f.tagid {
-                                       // e.g., last=A, this=C, next=B
+                               if f.tagid > last.tagid+1 && // accepting this tag would mean skipping some tags
+                                       i+1 < len(found) && // there is a "next" found tag after this one
+                                       found[i+1].tagid > last.tagid && // next found tag is usable (we haven't already passed it in accepted sequence)
+                                       found[i+1].tagid <= f.tagid { // next found tag is expected before this one (so we can't use both)
                                        log.Debugf("%s %s skipped out-of-order tag %d (found at %d) because it appears between tag %d (found at %d) and %d (found at %d)", filelabel, job.label, f.tagid, f.pos, last.tagid, last.pos, found[i+1].tagid, found[i+1].pos)
                                        continue
                                }
index 4024b830292f3d505c54d6b6bfab065a5c88f6a9..4e6ca0afa55b3903d6d3cbb09407bdbe6e21e723 100644 (file)
@@ -95,6 +95,22 @@ gactctagcagagtggccagccac
        c.Assert(err, check.IsNil)
        c.Check(tseq, check.DeepEquals, tileSeq{"test-seq": []tileLibRef{{0, 1}, {1, 1}, {2, 1}}})
 
+       // tags appear in seq: 0, 1, 3, 0, 4 -> skip second tag0
+       tilelib = &tileLibrary{taglib: &taglib, skipOOO: true}
+       tseq, err = tilelib.TileFasta("test-label", bytes.NewBufferString(`>test-seq
+ggagaactgtgctccgccttcaga
+cccccccccccccccccccc
+acacatgctagcgcgtcggggtgg
+ggggggggggggggggggggggg
+cctcccgagccgagccacccgtca
+ggggggggggggggggggggggg
+ggagaactgtgctccgccttcaga
+ggggggggggggggggggggggg
+gttattaataataacttatcatca
+`))
+       c.Assert(err, check.IsNil)
+       c.Check(tseq, check.DeepEquals, tileSeq{"test-seq": []tileLibRef{{0, 1}, {1, 1}, {3, 1}, {4, 1}}})
+
        // tags appear in seq: 0, 1, 3 -> don't skip
        tilelib = &tileLibrary{taglib: &taglib, skipOOO: true}
        tseq, err = tilelib.TileFasta("test-label", bytes.NewBufferString(`>test-seq