1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
13 const tagmapKeySize = 32
20 id tagID // 0-based position in input tagset
24 type tagLibrary struct {
25 tagmap map[tagmapKey]tagInfo
30 func (taglib *tagLibrary) Load(rdr io.Reader) error {
32 scanner := bufio.NewScanner(rdr)
34 data := scanner.Bytes()
35 if len(data) > 0 && data[0] == '>' {
37 seqs = append(seqs, append([]byte(nil), data...))
40 if err := scanner.Err(); err != nil {
43 return taglib.setTags(seqs)
46 func (taglib *tagLibrary) FindAll(in *bufio.Reader, passthrough io.Writer, fn func(id tagID, pos, taglen int)) error {
47 var window = make([]byte, 0, taglib.keylen*1000)
50 base, err := in.ReadByte()
53 } else if err != nil {
55 } else if base == '\r' || base == '\n' {
56 if buf, err := in.Peek(1); err == nil && len(buf) > 0 && buf[0] == '>' {
58 } else if err == io.EOF {
62 } else if base == '>' || base == ' ' {
63 return fmt.Errorf("unexpected char %q at offset %d in fasta data", base, offset)
66 if passthrough != nil {
67 if base >= 'A' && base <= 'Z' {
68 // lowercase for passthrough
71 _, err = passthrough.Write([]byte{base})
76 if !isbase[int(base)] {
77 // 'N' or various other chars meaning exact
83 window = append(window, base)
84 if len(window) == cap(window) {
85 copy(window, window[len(window)-taglib.keylen:])
86 window = window[:taglib.keylen]
88 key = ((key << 2) | twobit[int(base)]) & taglib.keymask
90 if len(window) < taglib.keylen {
92 } else if taginfo, ok := taglib.tagmap[key]; !ok {
94 } else if len(taginfo.tagseq) != taglib.keylen {
95 return fmt.Errorf("assertion failed: len(%q) != keylen %d", taginfo.tagseq, taglib.keylen)
97 fn(taginfo.id, offset-taglib.keylen, len(taginfo.tagseq))
98 window = window[:0] // don't try to match overlapping tags
104 func (taglib *tagLibrary) Len() int {
105 return len(taglib.tagmap)
108 func (taglib *tagLibrary) TagLen() int {
113 twobit = func() []tagmapKey {
114 r := make([]tagmapKey, 256)
125 isbase = func() []bool {
126 r := make([]bool, 256)
139 func (taglib *tagLibrary) setTags(tags [][]byte) error {
140 taglib.keylen = tagmapKeySize
141 for _, t := range tags {
142 if l := len(t); taglib.keylen > l {
146 taglib.keymask = tagmapKey((1 << (taglib.keylen * 2)) - 1)
147 taglib.tagmap = map[tagmapKey]tagInfo{}
148 for i, tag := range tags {
150 for _, b := range tag[:taglib.keylen] {
151 key = (key << 2) | twobit[int(b)]
153 if _, ok := taglib.tagmap[key]; ok {
154 return fmt.Errorf("first %d bytes of tag %d (%x) are not unique", taglib.keylen, i, key)
156 taglib.tagmap[key] = tagInfo{tagID(i), tag}
161 func (taglib *tagLibrary) Tags() [][]byte {
162 out := make([][]byte, len(taglib.tagmap))
163 untwobit := []byte{'a', 'c', 'g', 't'}
164 for key, info := range taglib.tagmap {
165 seq := make([]byte, taglib.keylen)
166 for i := len(seq) - 1; i >= 0; i-- {
167 seq[i] = untwobit[int(key)&3]
170 out[int(info.id)] = seq