Output to arvados storage instead of staging on local disk.
[lightning.git] / taglib_test.go
1 package main
2
3 import (
4         "bufio"
5         "fmt"
6         "io"
7         "math/rand"
8         "os"
9         "strings"
10         "testing"
11         "time"
12
13         "gopkg.in/check.v1"
14 )
15
16 func Test(t *testing.T) { check.TestingT(t) }
17
18 type taglibSuite struct{}
19
20 var _ = check.Suite(&taglibSuite{})
21
22 type tagMatch struct {
23         id     tagID
24         pos    int
25         taglen int
26 }
27
28 func (s *taglibSuite) TestFindAllTinyData(c *check.C) {
29         pr, pw, err := os.Pipe()
30         c.Assert(err, check.IsNil)
31         go func() {
32                 defer pw.Close()
33                 fmt.Fprintf(pw, `>0000.00
34 ggagaactgtgctccgccttcaga
35 acacatgctagcgcgtcggggtgg
36 gactctagcagagtggccagccac
37 `)
38         }()
39         var taglib tagLibrary
40         err = taglib.Load(pr)
41         c.Assert(err, check.IsNil)
42         haystack := []byte(`ggagaactgtgctccgccttcagaccccccccccccccccccccacacatgctagcgcgtcggggtgggggggggggggggggggggggggggactctagcagagtggccagccac`)
43         var matches []tagMatch
44         taglib.FindAll(haystack, func(id tagID, pos, taglen int) {
45                 matches = append(matches, tagMatch{id, pos, taglen})
46         })
47         c.Check(matches, check.DeepEquals, []tagMatch{{0, 0, 24}, {1, 44, 24}, {2, 92, 24}})
48 }
49
50 func (s *taglibSuite) TestFindAllRealisticSize(c *check.C) {
51         start := time.Now()
52         acgt := []byte{'a', 'c', 'g', 't'}
53         haystack := make([]byte, 25000000) // ~1/2 smallest human chromosome
54         c.Logf("@%v haystack", time.Since(start))
55         rand.Read(haystack)
56         for i := range haystack {
57                 haystack[i] = acgt[int(haystack[i]&3)]
58         }
59
60         tagcount := 12500
61         tagsize := 24
62         var tags []string
63         pr, pw := io.Pipe()
64         go func() {
65                 defer pw.Close()
66                 w := bufio.NewWriter(pw)
67                 defer w.Flush()
68                 used := map[string]bool{}
69                 fmt.Fprint(w, ">000\n")
70                 for i := 0; len(tags) < tagcount; i += (len(haystack) - tagsize) / tagcount {
71                         i := i
72                         tag := haystack[i : i+tagsize]
73                         for used[string(tag)] {
74                                 i++
75                                 tag = haystack[i : i+tagsize]
76                         }
77                         used[string(tag)] = true
78                         tags = append(tags, strings.ToLower(string(tag)))
79                         w.Write(tag)
80                         w.Write([]byte{'\n'})
81                 }
82         }()
83         c.Logf("@%v build library", time.Since(start))
84         var taglib tagLibrary
85         err := taglib.Load(pr)
86         c.Assert(err, check.IsNil)
87         c.Logf("@%v find tags in input", time.Since(start))
88         var matches []tagMatch
89         taglib.FindAll(haystack, func(id tagID, pos, taglen int) {
90                 matches = append(matches, tagMatch{id, pos, taglen})
91         })
92         c.Logf("@%v done", time.Since(start))
93         c.Check(matches[0], check.Equals, tagMatch{0, 0, tagsize})
94         c.Check(matches[1].id, check.Equals, tagID(1))
95 }