From 761c4fd02d57e4e8c1190a94a3956e9de9f643df Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Mon, 20 Jan 2020 09:54:26 -0500 Subject: [PATCH] Include ending tag in tile sequence. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- taglib.go | 9 ++------- taglib_test.go | 18 ++++++++++++------ tilelib.go | 4 ++-- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/taglib.go b/taglib.go index 367b9c4fff..91d4d4df29 100644 --- a/taglib.go +++ b/taglib.go @@ -39,12 +39,7 @@ func (taglib *tagLibrary) Load(rdr io.Reader) error { return taglib.setTags(seqs) } -type tagMatch struct { - id tagID - pos int -} - -func (taglib *tagLibrary) FindAll(buf []byte, fn func(id tagID, pos int)) { +func (taglib *tagLibrary) FindAll(buf []byte, fn func(id tagID, pos, taglen int)) { var key tagmapKey for i := 0; i <= len(buf)-taglib.keylen; i++ { copy(key[:taglib.keylen], buf[i:]) @@ -54,7 +49,7 @@ func (taglib *tagLibrary) FindAll(buf []byte, fn func(id tagID, pos int)) { // key portion matches, but not the entire tag continue } else { - fn(taginfo.id, i) + fn(taginfo.id, i, len(taginfo.tagseq)) i += len(taginfo.tagseq) - 1 // don't try to match overlapping tags } } diff --git a/taglib_test.go b/taglib_test.go index c023ab5fcc..fbddc81904 100644 --- a/taglib_test.go +++ b/taglib_test.go @@ -19,6 +19,12 @@ type taglibSuite struct{} var _ = check.Suite(&taglibSuite{}) +type tagMatch struct { + id tagID + pos int + taglen int +} + func (s *taglibSuite) TestFindAllTinyData(c *check.C) { pr, pw, err := os.Pipe() c.Assert(err, check.IsNil) @@ -35,10 +41,10 @@ gactctagcagagtggccagccac c.Assert(err, check.IsNil) haystack := []byte(`ggagaactgtgctccgccttcagaccccccccccccccccccccacacatgctagcgcgtcggggtgggggggggggggggggggggggggggactctagcagagtggccagccac`) var matches []tagMatch - taglib.FindAll(haystack, func(id tagID, pos int) { - matches = append(matches, tagMatch{id, pos}) + taglib.FindAll(haystack, func(id tagID, pos, taglen int) { + matches = append(matches, tagMatch{id, pos, taglen}) }) - c.Check(matches, check.DeepEquals, []tagMatch{{0, 0}, {1, 44}, {2, 92}}) + c.Check(matches, check.DeepEquals, []tagMatch{{0, 0, 24}, {1, 44, 24}, {2, 92, 24}}) } func (s *taglibSuite) TestFindAllRealisticSize(c *check.C) { @@ -80,10 +86,10 @@ func (s *taglibSuite) TestFindAllRealisticSize(c *check.C) { c.Assert(err, check.IsNil) c.Logf("@%v find tags in input", time.Since(start)) var matches []tagMatch - taglib.FindAll(haystack, func(id tagID, pos int) { - matches = append(matches, tagMatch{id, pos}) + taglib.FindAll(haystack, func(id tagID, pos, taglen int) { + matches = append(matches, tagMatch{id, pos, taglen}) }) c.Logf("@%v done", time.Since(start)) - c.Check(matches[0], check.Equals, tagMatch{0, 0}) + c.Check(matches[0], check.Equals, tagMatch{0, 0, tagsize}) c.Check(matches[1].id, check.Equals, tagID(1)) } diff --git a/tilelib.go b/tilelib.go index cf06bb93ce..cfe91fb49f 100644 --- a/tilelib.go +++ b/tilelib.go @@ -61,9 +61,9 @@ func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader) (tileSeq, var path []tileLibRef tilestart := -1 // position in fasta of tile that ends here tiletagid := tagID(-1) // tag id starting tile that ends here - tilelib.taglib.FindAll(job.fasta, func(id tagID, pos int) { + tilelib.taglib.FindAll(job.fasta, func(id tagID, pos, taglen int) { if tilestart >= 0 { - path = append(path, tilelib.getRef(tiletagid, job.fasta[tilestart:pos])) + path = append(path, tilelib.getRef(tiletagid, job.fasta[tilestart:pos+taglen])) } tilestart = pos tiletagid = id -- 2.30.2