Merge branch '19527-training-set'
[lightning.git] / taglib_test.go
1 // Copyright (C) The Lightning Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package lightning
6
7 import (
8         "bufio"
9         "fmt"
10         "io"
11         "math/rand"
12         "os"
13         "strings"
14         "testing"
15         "time"
16
17         "gopkg.in/check.v1"
18 )
19
20 func Test(t *testing.T) { check.TestingT(t) }
21
22 type taglibSuite struct{}
23
24 var _ = check.Suite(&taglibSuite{})
25
26 type tagMatch struct {
27         id     tagID
28         pos    int
29         taglen int
30 }
31
32 func (s *taglibSuite) TestFindAllTinyData(c *check.C) {
33         pr, pw, err := os.Pipe()
34         c.Assert(err, check.IsNil)
35         go func() {
36                 defer pw.Close()
37                 fmt.Fprintf(pw, `>0000.00
38 ggagaactgtgctccgccttcaga
39 acacatgctagcgcgtcggggtgg
40 gactctagcagagtggccagccac
41 `)
42         }()
43         var taglib tagLibrary
44         err = taglib.Load(pr)
45         c.Assert(err, check.IsNil)
46         haystack := []byte(`ggagaactgtgctccgccttcagaccccccccccccccccccccacacatgctagcgcgtcggggtgggggggggggggggggggggggggggactctagcagagtggccagccac`)
47         var matches []tagMatch
48         taglib.FindAll(haystack, func(id tagID, pos, taglen int) {
49                 matches = append(matches, tagMatch{id, pos, taglen})
50         })
51         c.Check(matches, check.DeepEquals, []tagMatch{{0, 0, 24}, {1, 44, 24}, {2, 92, 24}})
52 }
53
54 func (s *taglibSuite) TestFindAllRealisticSize(c *check.C) {
55         start := time.Now()
56         acgt := []byte{'a', 'c', 'g', 't'}
57         haystack := make([]byte, 25000000) // ~1/2 smallest human chromosome
58         c.Logf("@%v haystack", time.Since(start))
59         rand.Read(haystack)
60         for i := range haystack {
61                 haystack[i] = acgt[int(haystack[i]&3)]
62         }
63
64         tagcount := 12500
65         tagsize := 24
66         var tags []string
67         pr, pw := io.Pipe()
68         go func() {
69                 defer pw.Close()
70                 w := bufio.NewWriter(pw)
71                 defer w.Flush()
72                 used := map[string]bool{}
73                 fmt.Fprint(w, ">000\n")
74                 for i := 0; len(tags) < tagcount; i += (len(haystack) - tagsize) / tagcount {
75                         i := i
76                         tag := haystack[i : i+tagsize]
77                         for used[string(tag)] {
78                                 i++
79                                 tag = haystack[i : i+tagsize]
80                         }
81                         used[string(tag)] = true
82                         tags = append(tags, strings.ToLower(string(tag)))
83                         w.Write(tag)
84                         w.Write([]byte{'\n'})
85                 }
86         }()
87         c.Logf("@%v build library", time.Since(start))
88         var taglib tagLibrary
89         err := taglib.Load(pr)
90         c.Assert(err, check.IsNil)
91         c.Logf("@%v find tags in input", time.Since(start))
92         var matches []tagMatch
93         taglib.FindAll(haystack, func(id tagID, pos, taglen int) {
94                 matches = append(matches, tagMatch{id, pos, taglen})
95         })
96         c.Logf("@%v done", time.Since(start))
97         c.Check(matches[0], check.Equals, tagMatch{0, 0, tagsize})
98         c.Check(matches[1].id, check.Equals, tagID(1))
99 }