Output tile variant arrays.
[lightning.git] / tilelib.go
1 package main
2
3 import (
4         "bufio"
5         "bytes"
6         "crypto/md5"
7         "io"
8         "log"
9         "sync"
10 )
11
12 type tileVariantID int32 // 1-based
13
14 type tileLibRef struct {
15         tag     tagID
16         variant tileVariantID
17 }
18
19 type tileSeq map[string][]tileLibRef
20
21 type tileLibrary struct {
22         taglib  *tagLibrary
23         variant [][][md5.Size]byte
24         // count [][]int
25         // seq map[[md5.Size]byte][]byte
26
27         mtx sync.Mutex
28 }
29
30 func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader) (tileSeq, error) {
31         ret := tileSeq{}
32         var wg sync.WaitGroup
33         flush := func(seqlabel string, fasta []byte) {
34                 defer wg.Done()
35                 var path []tileLibRef
36                 if len(fasta) == 0 {
37                         return
38                 }
39                 tilestart := -1        // position in fasta of tile that ends here
40                 tiletagid := tagID(-1) // tag id starting tile that ends here
41                 tilelib.taglib.FindAll(fasta, func(id tagID, pos int) {
42                         if tilestart >= 0 {
43                                 path = append(path, tilelib.getRef(tiletagid, fasta[tilestart:pos]))
44                         }
45                         tilestart = pos
46                         tiletagid = id
47                 })
48                 if tiletagid >= 0 {
49                         path = append(path, tilelib.getRef(tiletagid, fasta[tilestart:]))
50                 }
51                 ret[seqlabel] = path
52                 log.Printf("%s %s tiled with path len %d", filelabel, seqlabel, len(path))
53         }
54         var fasta []byte
55         var seqlabel string
56         scanner := bufio.NewScanner(rdr)
57         for scanner.Scan() {
58                 buf := scanner.Bytes()
59                 if len(buf) == 0 || buf[0] == '>' {
60                         wg.Add(1)
61                         go flush(seqlabel, fasta)
62                         fasta = nil
63                         seqlabel = string(buf[1:])
64                 } else {
65                         fasta = append(fasta, bytes.ToLower(buf)...)
66                 }
67         }
68         if err := scanner.Err(); err != nil {
69                 return nil, err
70         }
71         wg.Add(1)
72         go flush(seqlabel, fasta)
73         wg.Wait()
74         return ret, nil
75 }
76
77 // Return a tileLibRef for a tile with the given tag and sequence,
78 // adding the sequence to the library if needed.
79 func (tilelib *tileLibrary) getRef(tag tagID, seq []byte) tileLibRef {
80         tilelib.mtx.Lock()
81         defer tilelib.mtx.Unlock()
82         // if tilelib.seq == nil {
83         //      tilelib.seq = map[[md5.Size]byte][]byte{}
84         // }
85         if len(tilelib.variant) <= int(tag) {
86                 tilelib.variant = append(tilelib.variant, make([][][md5.Size]byte, int(tag)-len(tilelib.variant)+1)...)
87         }
88         seqhash := md5.Sum(seq)
89         for i, varhash := range tilelib.variant[tag] {
90                 if varhash == seqhash {
91                         return tileLibRef{tag: tag, variant: tileVariantID(i + 1)}
92                 }
93         }
94         tilelib.variant[tag] = append(tilelib.variant[tag], seqhash)
95         // tilelib.seq[seqhash] = append([]byte(nil), seq...)
96         return tileLibRef{tag: tag, variant: tileVariantID(len(tilelib.variant[tag]))}
97 }