Include ending tag in tile sequence.
authorTom Clegg <tom@tomclegg.ca>
Mon, 20 Jan 2020 14:54:26 +0000 (09:54 -0500)
committerTom Clegg <tom@tomclegg.ca>
Mon, 20 Jan 2020 14:54:26 +0000 (09:54 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

taglib.go
taglib_test.go
tilelib.go

index 367b9c4fff4e06047730379848acad38bb4ace01..91d4d4df299f0b6fe4ff8dfe2d6576cccfa5aed9 100644 (file)
--- a/taglib.go
+++ b/taglib.go
@@ -39,12 +39,7 @@ func (taglib *tagLibrary) Load(rdr io.Reader) error {
        return taglib.setTags(seqs)
 }
 
-type tagMatch struct {
-       id  tagID
-       pos int
-}
-
-func (taglib *tagLibrary) FindAll(buf []byte, fn func(id tagID, pos int)) {
+func (taglib *tagLibrary) FindAll(buf []byte, fn func(id tagID, pos, taglen int)) {
        var key tagmapKey
        for i := 0; i <= len(buf)-taglib.keylen; i++ {
                copy(key[:taglib.keylen], buf[i:])
@@ -54,7 +49,7 @@ func (taglib *tagLibrary) FindAll(buf []byte, fn func(id tagID, pos int)) {
                        // key portion matches, but not the entire tag
                        continue
                } else {
-                       fn(taginfo.id, i)
+                       fn(taginfo.id, i, len(taginfo.tagseq))
                        i += len(taginfo.tagseq) - 1 // don't try to match overlapping tags
                }
        }
index c023ab5fccc52a15422829cf1e8e44c0d5259379..fbddc81904a8083d18d6d84b992ed8ffb274b98d 100644 (file)
@@ -19,6 +19,12 @@ type taglibSuite struct{}
 
 var _ = check.Suite(&taglibSuite{})
 
+type tagMatch struct {
+       id     tagID
+       pos    int
+       taglen int
+}
+
 func (s *taglibSuite) TestFindAllTinyData(c *check.C) {
        pr, pw, err := os.Pipe()
        c.Assert(err, check.IsNil)
@@ -35,10 +41,10 @@ gactctagcagagtggccagccac
        c.Assert(err, check.IsNil)
        haystack := []byte(`ggagaactgtgctccgccttcagaccccccccccccccccccccacacatgctagcgcgtcggggtgggggggggggggggggggggggggggactctagcagagtggccagccac`)
        var matches []tagMatch
-       taglib.FindAll(haystack, func(id tagID, pos int) {
-               matches = append(matches, tagMatch{id, pos})
+       taglib.FindAll(haystack, func(id tagID, pos, taglen int) {
+               matches = append(matches, tagMatch{id, pos, taglen})
        })
-       c.Check(matches, check.DeepEquals, []tagMatch{{0, 0}, {1, 44}, {2, 92}})
+       c.Check(matches, check.DeepEquals, []tagMatch{{0, 0, 24}, {1, 44, 24}, {2, 92, 24}})
 }
 
 func (s *taglibSuite) TestFindAllRealisticSize(c *check.C) {
@@ -80,10 +86,10 @@ func (s *taglibSuite) TestFindAllRealisticSize(c *check.C) {
        c.Assert(err, check.IsNil)
        c.Logf("@%v find tags in input", time.Since(start))
        var matches []tagMatch
-       taglib.FindAll(haystack, func(id tagID, pos int) {
-               matches = append(matches, tagMatch{id, pos})
+       taglib.FindAll(haystack, func(id tagID, pos, taglen int) {
+               matches = append(matches, tagMatch{id, pos, taglen})
        })
        c.Logf("@%v done", time.Since(start))
-       c.Check(matches[0], check.Equals, tagMatch{0, 0})
+       c.Check(matches[0], check.Equals, tagMatch{0, 0, tagsize})
        c.Check(matches[1].id, check.Equals, tagID(1))
 }
index cf06bb93ce77cf87391c5934f9ee105b734ff893..cfe91fb49f5fccecd9f43d4d9770f03c57cb083d 100644 (file)
@@ -61,9 +61,9 @@ func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader) (tileSeq,
                var path []tileLibRef
                tilestart := -1        // position in fasta of tile that ends here
                tiletagid := tagID(-1) // tag id starting tile that ends here
-               tilelib.taglib.FindAll(job.fasta, func(id tagID, pos int) {
+               tilelib.taglib.FindAll(job.fasta, func(id tagID, pos, taglen int) {
                        if tilestart >= 0 {
-                               path = append(path, tilelib.getRef(tiletagid, job.fasta[tilestart:pos]))
+                               path = append(path, tilelib.getRef(tiletagid, job.fasta[tilestart:pos+taglen]))
                        }
                        tilestart = pos
                        tiletagid = id