Log tile library size periodically.
[lightning.git] / tilelib.go
1 package main
2
3 import (
4         "bufio"
5         "bytes"
6         "io"
7         "log"
8         "sync"
9
10         "golang.org/x/crypto/blake2b"
11 )
12
13 type tileVariantID int32 // 1-based
14
15 type tileLibRef struct {
16         tag     tagID
17         variant tileVariantID
18 }
19
20 type tileSeq map[string][]tileLibRef
21
22 type tileLibrary struct {
23         taglib  *tagLibrary
24         variant [][][blake2b.Size]byte
25         // count [][]int
26         // seq map[[blake2b.Size]byte][]byte
27         variants int
28
29         mtx sync.Mutex
30 }
31
32 func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader) (tileSeq, error) {
33         ret := tileSeq{}
34         type jobT struct {
35                 label string
36                 fasta []byte
37         }
38         todo := make(chan jobT)
39         scanner := bufio.NewScanner(rdr)
40         go func() {
41                 defer close(todo)
42                 var fasta []byte
43                 var seqlabel string
44                 for scanner.Scan() {
45                         buf := scanner.Bytes()
46                         if len(buf) == 0 || buf[0] == '>' {
47                                 todo <- jobT{seqlabel, fasta}
48                                 seqlabel, fasta = string(buf[1:]), nil
49                                 log.Printf("%s %s reading fasta", filelabel, seqlabel)
50                         } else {
51                                 fasta = append(fasta, bytes.ToLower(buf)...)
52                         }
53                 }
54                 todo <- jobT{seqlabel, fasta}
55         }()
56         for job := range todo {
57                 if len(job.fasta) == 0 {
58                         continue
59                 }
60                 log.Printf("%s %s tiling", filelabel, job.label)
61                 var path []tileLibRef
62                 tilestart := -1        // position in fasta of tile that ends here
63                 tiletagid := tagID(-1) // tag id starting tile that ends here
64                 tilelib.taglib.FindAll(job.fasta, func(id tagID, pos int) {
65                         if tilestart >= 0 {
66                                 path = append(path, tilelib.getRef(tiletagid, job.fasta[tilestart:pos]))
67                         }
68                         tilestart = pos
69                         tiletagid = id
70                 })
71                 if tiletagid >= 0 {
72                         path = append(path, tilelib.getRef(tiletagid, job.fasta[tilestart:]))
73                 }
74                 ret[job.label] = path
75                 log.Printf("%s %s tiled with path len %d", filelabel, job.label, len(path))
76         }
77         return ret, scanner.Err()
78 }
79
80 func (tilelib *tileLibrary) Len() int {
81         tilelib.mtx.Lock()
82         defer tilelib.mtx.Unlock()
83         return tilelib.variants
84 }
85
86 // Return a tileLibRef for a tile with the given tag and sequence,
87 // adding the sequence to the library if needed.
88 func (tilelib *tileLibrary) getRef(tag tagID, seq []byte) tileLibRef {
89         tilelib.mtx.Lock()
90         defer tilelib.mtx.Unlock()
91         // if tilelib.seq == nil {
92         //      tilelib.seq = map[[blake2b.Size]byte][]byte{}
93         // }
94         if tilelib.variant == nil {
95                 tilelib.variant = make([][][blake2b.Size]byte, tilelib.taglib.Len())
96         }
97         hash, err := blake2b.New(32, nil)
98         if err != nil {
99                 panic(err)
100         }
101         _, err = hash.Write(seq)
102         if err != nil {
103                 panic(err)
104         }
105         var seqhash [blake2b.Size]byte
106         copy(seqhash[:], hash.Sum(nil))
107         for i, varhash := range tilelib.variant[tag] {
108                 if varhash == seqhash {
109                         return tileLibRef{tag: tag, variant: tileVariantID(i + 1)}
110                 }
111         }
112         tilelib.variants++
113         tilelib.variant[tag] = append(tilelib.variant[tag], seqhash)
114         // tilelib.seq[seqhash] = append([]byte(nil), seq...)
115         return tileLibRef{tag: tag, variant: tileVariantID(len(tilelib.variant[tag]))}
116 }