1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
13 const tagmapKeySize = 32
20 id tagID // 0-based position in input tagset
24 type tagLibrary struct {
25 tagmap map[tagmapKey]tagInfo
30 func (taglib *tagLibrary) Load(rdr io.Reader) error {
32 scanner := bufio.NewScanner(rdr)
34 data := scanner.Bytes()
35 if len(data) > 0 && data[0] == '>' {
37 seqs = append(seqs, append([]byte(nil), data...))
40 if err := scanner.Err(); err != nil {
43 return taglib.setTags(seqs)
46 func (taglib *tagLibrary) FindAll(in *bufio.Reader, passthrough io.Writer, fn func(id tagID, pos, taglen int)) error {
47 var window = make([]byte, 0, taglib.keylen*1000)
50 base, err := in.ReadByte()
53 } else if err != nil {
55 } else if base == '\r' || base == '\n' {
56 if buf, err := in.Peek(1); err == nil && len(buf) > 0 && buf[0] == '>' {
58 } else if err == io.EOF {
62 } else if base == '>' || base == ' ' {
63 return fmt.Errorf("unexpected char %q at offset %d in fasta data", base, offset)
66 if passthrough != nil {
67 _, err = passthrough.Write([]byte{base})
72 if !isbase[int(base)] {
73 // 'N' or various other chars meaning exact
79 window = append(window, base)
80 if len(window) == cap(window) {
81 copy(window, window[len(window)-taglib.keylen:])
82 window = window[:taglib.keylen]
84 key = ((key << 2) | twobit[int(base)]) & taglib.keymask
86 if len(window) < taglib.keylen {
88 } else if taginfo, ok := taglib.tagmap[key]; !ok {
90 } else if len(taginfo.tagseq) != taglib.keylen {
91 return fmt.Errorf("assertion failed: len(%q) != keylen %d", taginfo.tagseq, taglib.keylen)
93 fn(taginfo.id, offset-taglib.keylen, len(taginfo.tagseq))
94 window = window[:0] // don't try to match overlapping tags
100 func (taglib *tagLibrary) Len() int {
101 return len(taglib.tagmap)
104 func (taglib *tagLibrary) TagLen() int {
109 twobit = func() []tagmapKey {
110 r := make([]tagmapKey, 256)
121 isbase = func() []bool {
122 r := make([]bool, 256)
135 func (taglib *tagLibrary) setTags(tags [][]byte) error {
136 taglib.keylen = tagmapKeySize
137 for _, t := range tags {
138 if l := len(t); taglib.keylen > l {
142 taglib.keymask = tagmapKey((1 << (taglib.keylen * 2)) - 1)
143 taglib.tagmap = map[tagmapKey]tagInfo{}
144 for i, tag := range tags {
146 for _, b := range tag[:taglib.keylen] {
147 key = (key << 2) | twobit[int(b)]
149 if _, ok := taglib.tagmap[key]; ok {
150 return fmt.Errorf("first %d bytes of tag %d (%x) are not unique", taglib.keylen, i, key)
152 taglib.tagmap[key] = tagInfo{tagID(i), tag}
157 func (taglib *tagLibrary) Tags() [][]byte {
158 out := make([][]byte, len(taglib.tagmap))
159 untwobit := []byte{'a', 'c', 'g', 't'}
160 for key, info := range taglib.tagmap {
161 seq := make([]byte, taglib.keylen)
162 for i := len(seq) - 1; i >= 0; i-- {
163 seq[i] = untwobit[int(key)&3]
166 out[int(info.id)] = seq