Write hgvs-based numpy matrix.
[lightning.git] / exportnumpy_test.go
1 package lightning
2
3 import (
4         "bytes"
5         "io/ioutil"
6         "os"
7
8         "github.com/kshedden/gonpy"
9         "gopkg.in/check.v1"
10 )
11
12 type exportNumpySuite struct{}
13
14 var _ = check.Suite(&exportNumpySuite{})
15
16 func (s *exportNumpySuite) TestFastaToNumpy(c *check.C) {
17         tmpdir := c.MkDir()
18
19         err := ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
20         c.Check(err, check.IsNil)
21
22         var buffer bytes.Buffer
23         exited := (&importer{}).RunCommand("import", []string{"-local=true", "-tag-library", "testdata/tags", "-output-tiles", "-save-incomplete-tiles", "testdata/a.1.fasta", "testdata/tinyref.fasta"}, &bytes.Buffer{}, &buffer, os.Stderr)
24         c.Assert(exited, check.Equals, 0)
25         exited = (&exportNumpy{}).RunCommand("export-numpy", []string{"-local=true", "-output-dir", tmpdir, "-output-annotations", tmpdir + "/annotations.csv", "-regions", tmpdir + "/chr1-12-100.bed"}, &buffer, os.Stderr, os.Stderr)
26         c.Check(exited, check.Equals, 0)
27         f, err := os.Open(tmpdir + "/matrix.npy")
28         c.Assert(err, check.IsNil)
29         defer f.Close()
30         npy, err := gonpy.NewReader(f)
31         c.Assert(err, check.IsNil)
32         variants, err := npy.GetInt16()
33         c.Assert(err, check.IsNil)
34         c.Check(variants, check.HasLen, 6)
35         for i := 0; i < 4 && i < len(variants); i += 2 {
36                 if variants[i] == 1 {
37                         c.Check(variants[i+1], check.Equals, int16(2), check.Commentf("i=%d, v=%v", i, variants))
38                 } else {
39                         c.Check(variants[i], check.Equals, int16(2), check.Commentf("i=%d, v=%v", i, variants))
40                 }
41         }
42         for i := 4; i < 6 && i < len(variants); i += 2 {
43                 c.Check(variants[i], check.Equals, int16(1), check.Commentf("i=%d, v=%v", i, variants))
44         }
45         annotations, err := ioutil.ReadFile(tmpdir + "/annotations.csv")
46         c.Check(err, check.IsNil)
47         c.Logf("%s", string(annotations))
48         c.Check(string(annotations), check.Matches, `(?ms)(.*\n)?1,1,2,chr1:g.84_85insACTGCGATCTGA\n.*`)
49         c.Check(string(annotations), check.Matches, `(?ms)(.*\n)?1,1,1,chr1:g.87_96delinsGCATCTGCA\n.*`)
50 }
51
52 func sortUints(variants []int16) {
53         for i := 0; i < len(variants); i += 2 {
54                 if variants[i] > variants[i+1] {
55                         for j := 0; j < len(variants); j++ {
56                                 variants[j], variants[j+1] = variants[j+1], variants[j]
57                         }
58                         return
59                 }
60         }
61 }
62
63 func (s *exportNumpySuite) TestOnehot(c *check.C) {
64         for _, trial := range []struct {
65                 incols  int
66                 in      []int16
67                 outcols int
68                 out     []int16
69         }{
70                 {2, []int16{1, 1, 1, 1}, 2, []int16{1, 1, 1, 1}},
71                 {2, []int16{1, 1, 1, 2}, 3, []int16{1, 1, 0, 1, 0, 1}},
72                 {
73                         // 2nd column => 3 one-hot columns
74                         // 4th column => 0 one-hot columns
75                         4, []int16{
76                                 1, 1, 0, 0,
77                                 1, 2, 1, 0,
78                                 1, 3, 0, 0,
79                         }, 5, []int16{
80                                 1, 1, 0, 0, 0,
81                                 1, 0, 1, 0, 1,
82                                 1, 0, 0, 1, 0,
83                         },
84                 },
85         } {
86                 out, _, outcols := recodeOnehot(trial.in, trial.incols)
87                 c.Check(out, check.DeepEquals, trial.out)
88                 c.Check(outcols, check.Equals, trial.outcols)
89         }
90 }