12 type tileVariantID int32 // 1-based
14 type tileLibRef struct {
19 type tileSeq map[string][]tileLibRef
21 type tileLibrary struct {
23 variant [][][md5.Size]byte
25 // seq map[[md5.Size]byte][]byte
30 func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader) (tileSeq, error) {
33 flush := func(seqlabel string, fasta []byte) {
39 tilestart := -1 // position in fasta of tile that ends here
40 tiletagid := tagID(-1) // tag id starting tile that ends here
41 tilelib.taglib.FindAll(fasta, func(id tagID, pos int) {
43 path = append(path, tilelib.getRef(tiletagid, fasta[tilestart:pos]))
49 path = append(path, tilelib.getRef(tiletagid, fasta[tilestart:]))
52 log.Printf("%s %s tiled with path len %d", filelabel, seqlabel, len(path))
56 scanner := bufio.NewScanner(rdr)
58 buf := scanner.Bytes()
59 if len(buf) == 0 || buf[0] == '>' {
61 go flush(seqlabel, fasta)
63 seqlabel = string(buf[1:])
65 fasta = append(fasta, bytes.ToLower(buf)...)
68 if err := scanner.Err(); err != nil {
72 go flush(seqlabel, fasta)
77 // Return a tileLibRef for a tile with the given tag and sequence,
78 // adding the sequence to the library if needed.
79 func (tilelib *tileLibrary) getRef(tag tagID, seq []byte) tileLibRef {
81 defer tilelib.mtx.Unlock()
82 // if tilelib.seq == nil {
83 // tilelib.seq = map[[md5.Size]byte][]byte{}
85 if len(tilelib.variant) <= int(tag) {
86 tilelib.variant = append(tilelib.variant, make([][][md5.Size]byte, int(tag)-len(tilelib.variant)+1)...)
88 seqhash := md5.Sum(seq)
89 for i, varhash := range tilelib.variant[tag] {
90 if varhash == seqhash {
91 return tileLibRef{tag: tag, variant: tileVariantID(i + 1)}
94 tilelib.variant[tag] = append(tilelib.variant[tag], seqhash)
95 // tilelib.seq[seqhash] = append([]byte(nil), seq...)
96 return tileLibRef{tag: tag, variant: tileVariantID(len(tilelib.variant[tag]))}