1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
14 const tagmapKeySize = 32
21 id tagID // 0-based position in input tagset
25 type tagLibrary struct {
26 tagmap map[tagmapKey]tagInfo
31 func (taglib *tagLibrary) Load(rdr io.Reader) error {
33 scanner := bufio.NewScanner(rdr)
35 data := scanner.Bytes()
36 if len(data) > 0 && data[0] == '>' {
38 seqs = append(seqs, append([]byte(nil), data...))
41 if err := scanner.Err(); err != nil {
44 return taglib.setTags(seqs)
47 func (taglib *tagLibrary) FindAll(buf []byte, fn func(id tagID, pos, taglen int)) {
49 valid := 0 // if valid < taglib.keylen, key has "no data" zeroes that are otherwise indistinguishable from "A"
50 for i, base := range buf {
51 if !isbase[int(base)] {
55 key = ((key << 2) | twobit[int(base)]) & taglib.keymask
58 if valid < taglib.keylen {
60 } else if taginfo, ok := taglib.tagmap[key]; !ok {
62 } else if tagstart := i - taglib.keylen + 1; len(taginfo.tagseq) > taglib.keylen && (len(buf) < i+len(taginfo.tagseq) || !bytes.Equal(taginfo.tagseq, buf[i:i+len(taginfo.tagseq)])) {
63 // key portion matches, but not the entire tag
66 fn(taginfo.id, tagstart, len(taginfo.tagseq))
67 valid = 0 // don't try to match overlapping tags
72 func (taglib *tagLibrary) Len() int {
73 return len(taglib.tagmap)
76 func (taglib *tagLibrary) TagLen() int {
81 twobit = func() []tagmapKey {
82 r := make([]tagmapKey, 256)
93 isbase = func() []bool {
94 r := make([]bool, 256)
107 func (taglib *tagLibrary) setTags(tags [][]byte) error {
108 taglib.keylen = tagmapKeySize
109 for _, t := range tags {
110 if l := len(t); taglib.keylen > l {
114 taglib.keymask = tagmapKey((1 << (taglib.keylen * 2)) - 1)
115 taglib.tagmap = map[tagmapKey]tagInfo{}
116 for i, tag := range tags {
118 for _, b := range tag[:taglib.keylen] {
119 key = (key << 2) | twobit[int(b)]
121 if _, ok := taglib.tagmap[key]; ok {
122 return fmt.Errorf("first %d bytes of tag %d (%x) are not unique", taglib.keylen, i, key)
124 taglib.tagmap[key] = tagInfo{tagID(i), tag}
129 func (taglib *tagLibrary) Tags() [][]byte {
130 out := make([][]byte, len(taglib.tagmap))
131 untwobit := []byte{'a', 'c', 'g', 't'}
132 for key, info := range taglib.tagmap {
133 seq := make([]byte, taglib.keylen)
134 for i := len(seq) - 1; i >= 0; i-- {
135 seq[i] = untwobit[int(key)&3]
138 out[int(info.id)] = seq