md5 -> blake2b
[lightning.git] / tilelib.go
1 package main
2
3 import (
4         "bufio"
5         "bytes"
6         "io"
7         "log"
8         "sync"
9
10         "golang.org/x/crypto/blake2b"
11 )
12
13 type tileVariantID int32 // 1-based
14
15 type tileLibRef struct {
16         tag     tagID
17         variant tileVariantID
18 }
19
20 type tileSeq map[string][]tileLibRef
21
22 type tileLibrary struct {
23         taglib  *tagLibrary
24         variant [][][blake2b.Size]byte
25         // count [][]int
26         // seq map[[blake2b.Size]byte][]byte
27
28         mtx sync.Mutex
29 }
30
31 func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader) (tileSeq, error) {
32         ret := tileSeq{}
33         type jobT struct {
34                 label string
35                 fasta []byte
36         }
37         todo := make(chan jobT)
38         scanner := bufio.NewScanner(rdr)
39         go func() {
40                 defer close(todo)
41                 var fasta []byte
42                 var seqlabel string
43                 for scanner.Scan() {
44                         buf := scanner.Bytes()
45                         if len(buf) == 0 || buf[0] == '>' {
46                                 todo <- jobT{seqlabel, fasta}
47                                 seqlabel, fasta = string(buf[1:]), nil
48                                 log.Printf("%s %s reading fasta", filelabel, seqlabel)
49                         } else {
50                                 fasta = append(fasta, bytes.ToLower(buf)...)
51                         }
52                 }
53                 todo <- jobT{seqlabel, fasta}
54         }()
55         for job := range todo {
56                 if len(job.fasta) == 0 {
57                         continue
58                 }
59                 log.Printf("%s %s tiling", filelabel, job.label)
60                 var path []tileLibRef
61                 tilestart := -1        // position in fasta of tile that ends here
62                 tiletagid := tagID(-1) // tag id starting tile that ends here
63                 tilelib.taglib.FindAll(job.fasta, func(id tagID, pos int) {
64                         if tilestart >= 0 {
65                                 path = append(path, tilelib.getRef(tiletagid, job.fasta[tilestart:pos]))
66                         }
67                         tilestart = pos
68                         tiletagid = id
69                 })
70                 if tiletagid >= 0 {
71                         path = append(path, tilelib.getRef(tiletagid, job.fasta[tilestart:]))
72                 }
73                 ret[job.label] = path
74                 log.Printf("%s %s tiled with path len %d", filelabel, job.label, len(path))
75         }
76         return ret, scanner.Err()
77 }
78
79 // Return a tileLibRef for a tile with the given tag and sequence,
80 // adding the sequence to the library if needed.
81 func (tilelib *tileLibrary) getRef(tag tagID, seq []byte) tileLibRef {
82         tilelib.mtx.Lock()
83         defer tilelib.mtx.Unlock()
84         // if tilelib.seq == nil {
85         //      tilelib.seq = map[[blake2b.Size]byte][]byte{}
86         // }
87         if len(tilelib.variant) <= int(tag) {
88                 tilelib.variant = append(tilelib.variant, make([][][blake2b.Size]byte, int(tag)-len(tilelib.variant)+1)...)
89         }
90         hash, err := blake2b.New(32, nil)
91         if err != nil {
92                 panic(err)
93         }
94         _, err = hash.Write(seq)
95         if err != nil {
96                 panic(err)
97         }
98         var seqhash [blake2b.Size]byte
99         copy(seqhash[:], hash.Sum(nil))
100         for i, varhash := range tilelib.variant[tag] {
101                 if varhash == seqhash {
102                         return tileLibRef{tag: tag, variant: tileVariantID(i + 1)}
103                 }
104         }
105         tilelib.variant[tag] = append(tilelib.variant[tag], seqhash)
106         // tilelib.seq[seqhash] = append([]byte(nil), seq...)
107         return tileLibRef{tag: tag, variant: tileVariantID(len(tilelib.variant[tag]))}
108 }