Initial tiling code.
[lightning.git] / tilelib.go
1 package main
2
3 import (
4         "bufio"
5         "bytes"
6         "crypto/md5"
7         "io"
8         "sync"
9 )
10
11 type tileVariantID int32 // 1-based
12
13 type tileLibRef struct {
14         tag     tagID
15         variant tileVariantID
16 }
17
18 type tileSeq map[string][]tileLibRef
19
20 type tileLibrary struct {
21         taglib  *tagLibrary
22         variant [][][md5.Size]byte
23         // count [][]int
24         seq map[[md5.Size]byte][]byte
25
26         mtx sync.Mutex
27 }
28
29 func (tilelib *tileLibrary) TileFasta(rdr io.Reader) (tileSeq, error) {
30         ret := tileSeq{}
31         flush := func(label string, fasta []byte) {
32                 var path []tileLibRef
33                 if len(fasta) == 0 {
34                         return
35                 }
36                 tilestart := -1        // position in fasta of tile that ends here
37                 tiletagid := tagID(-1) // tag id starting tile that ends here
38                 tilelib.taglib.FindAll(fasta, func(id tagID, pos int) {
39                         if tilestart >= 0 {
40                                 path = append(path, tilelib.getRef(tiletagid, fasta[tilestart:pos]))
41                                 // log.Printf("%q: tile %d is variant %d of tile %d", label, len(path), path[len(path)-1], id)
42                         }
43                         tilestart = pos
44                         tiletagid = id
45                 })
46                 if tiletagid >= 0 {
47                         path = append(path, tilelib.getRef(tiletagid, fasta[tilestart:]))
48                         // log.Printf("%q: tile %d is variant %d of tile %d", label, len(path), path[len(path)-1], tiletagid)
49                 }
50                 ret[label] = path
51         }
52         var fasta []byte
53         var label string
54         scanner := bufio.NewScanner(rdr)
55         for scanner.Scan() {
56                 buf := scanner.Bytes()
57                 if len(buf) == 0 || buf[0] == '>' {
58                         flush(label, fasta)
59                         fasta = nil
60                         label = string(buf[1:])
61                 } else {
62                         fasta = append(fasta, bytes.ToLower(buf)...)
63                 }
64         }
65         if err := scanner.Err(); err != nil {
66                 return nil, err
67         }
68         flush(label, fasta)
69         return ret, nil
70 }
71
72 // Return a tileLibRef for a tile with the given tag and sequence,
73 // adding the sequence to the library if needed.
74 func (tilelib *tileLibrary) getRef(tag tagID, seq []byte) tileLibRef {
75         tilelib.mtx.Lock()
76         defer tilelib.mtx.Unlock()
77         if tilelib.seq == nil {
78                 tilelib.seq = map[[md5.Size]byte][]byte{}
79         }
80         for len(tilelib.variant) <= int(tag) {
81                 tilelib.variant = append(tilelib.variant, nil)
82         }
83         seqhash := md5.Sum(seq)
84         for i, varhash := range tilelib.variant[tag] {
85                 if varhash == seqhash {
86                         return tileLibRef{tag: tag, variant: tileVariantID(i + 1)}
87                 }
88         }
89         tilelib.variant[tag] = append(tilelib.variant[tag], seqhash)
90         tilelib.seq[seqhash] = append([]byte(nil), seq...)
91         return tileLibRef{tag: tag, variant: tileVariantID(len(tilelib.variant[tag]))}
92 }