Fix -max-tag filter.
[lightning.git] / slice_test.go
1 // Copyright (C) The Lightning Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package lightning
6
7 import (
8         "io/ioutil"
9         "os"
10         "os/exec"
11
12         "github.com/kshedden/gonpy"
13         "gopkg.in/check.v1"
14 )
15
16 type sliceSuite struct{}
17
18 var _ = check.Suite(&sliceSuite{})
19
20 func (s *sliceSuite) TestImportAndSlice(c *check.C) {
21         tmpdir := c.MkDir()
22         err := os.Mkdir(tmpdir+"/lib1", 0777)
23         c.Assert(err, check.IsNil)
24         err = os.Mkdir(tmpdir+"/lib2", 0777)
25         c.Assert(err, check.IsNil)
26         err = os.Mkdir(tmpdir+"/lib3", 0777)
27         c.Assert(err, check.IsNil)
28         cwd, err := os.Getwd()
29         c.Assert(err, check.IsNil)
30         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
31         c.Assert(err, check.IsNil)
32         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
33         c.Assert(err, check.IsNil)
34
35         err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
36         c.Check(err, check.IsNil)
37
38         c.Log("=== import testdata/ref ===")
39         exited := (&importer{}).RunCommand("import", []string{
40                 "-local=true",
41                 "-tag-library", "testdata/tags",
42                 "-output-tiles",
43                 "-save-incomplete-tiles",
44                 "-o", tmpdir + "/lib1/library1.gob",
45                 "testdata/ref.fasta",
46         }, nil, os.Stderr, os.Stderr)
47         c.Assert(exited, check.Equals, 0)
48
49         c.Log("=== import testdata/pipeline1 ===")
50         exited = (&importer{}).RunCommand("import", []string{
51                 "-local=true",
52                 "-tag-library", "testdata/tags",
53                 "-output-tiles",
54                 "-o", tmpdir + "/lib2/library2.gob",
55                 tmpdir + "/pipeline1",
56         }, nil, os.Stderr, os.Stderr)
57         c.Assert(exited, check.Equals, 0)
58
59         c.Log("=== import pipeline1dup ===")
60         exited = (&importer{}).RunCommand("import", []string{
61                 "-local=true",
62                 "-tag-library", "testdata/tags",
63                 "-output-tiles",
64                 "-o", tmpdir + "/lib3/library3.gob",
65                 tmpdir + "/pipeline1dup",
66         }, nil, os.Stderr, os.Stderr)
67         c.Assert(exited, check.Equals, 0)
68
69         slicedir := c.MkDir()
70
71         c.Log("=== slice ===")
72         exited = (&slicecmd{}).RunCommand("slice", []string{
73                 "-local=true",
74                 "-output-dir=" + slicedir,
75                 "-tags-per-file=2",
76                 tmpdir + "/lib1",
77                 tmpdir + "/lib2",
78                 tmpdir + "/lib3",
79         }, nil, os.Stderr, os.Stderr)
80         c.Check(exited, check.Equals, 0)
81         out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
82         c.Logf("%s", out)
83
84         c.Log("=== dump ===")
85         {
86                 dumpdir := c.MkDir()
87                 exited = (&dump{}).RunCommand("dump", []string{
88                         "-local=true",
89                         "-tags=4,6,7",
90                         "-input-dir=" + slicedir,
91                         "-output-dir=" + dumpdir,
92                 }, nil, os.Stderr, os.Stderr)
93                 c.Check(exited, check.Equals, 0)
94                 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
95                 c.Logf("%s", out)
96                 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
97                 c.Assert(err, check.IsNil)
98                 c.Logf("%s", dumped)
99                 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTG.*`)
100         }
101
102         c.Log("=== slice-numpy ===")
103         {
104                 npydir := c.MkDir()
105                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
106                         "-local=true",
107                         "-input-dir=" + slicedir,
108                         "-output-dir=" + npydir,
109                 }, nil, os.Stderr, os.Stderr)
110                 c.Check(exited, check.Equals, 0)
111                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
112                 c.Logf("%s", out)
113
114                 f, err := os.Open(npydir + "/matrix.0000.npy")
115                 c.Assert(err, check.IsNil)
116                 defer f.Close()
117                 npy, err := gonpy.NewReader(f)
118                 c.Assert(err, check.IsNil)
119                 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
120                 variants, err := npy.GetInt16()
121                 c.Check(variants, check.DeepEquals, []int16{2, 1, 1, 2, -1, -1, 1, 1, 2, 1, 1, 2, -1, -1, 1, 1})
122
123                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
124                 c.Assert(err, check.IsNil)
125                 c.Logf("%s", annotations)
126                 for _, s := range []string{
127                         "chr1:g.161A>T",
128                         "chr1:g.178A>T",
129                         "chr1:g.1_3delinsGGC",
130                         "chr1:g.222_224del",
131                 } {
132                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
133                 }
134
135                 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
136                 c.Assert(err, check.IsNil)
137                 c.Logf("%s", annotations)
138                 for _, s := range []string{
139                         ",2,chr2:g.1_3delinsAAA",
140                         ",2,chr2:g.125_127delinsAAA",
141                         ",4,chr2:g.125_127delinsAAA",
142                 } {
143                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
144                 }
145         }
146
147         c.Log("=== slice-numpy + regions ===")
148         {
149                 npydir := c.MkDir()
150                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
151                         "-local=true",
152                         "-regions=" + tmpdir + "/chr1-12-100.bed",
153                         "-input-dir=" + slicedir,
154                         "-output-dir=" + npydir,
155                         "-chunked-hgvs-matrix=true",
156                 }, nil, os.Stderr, os.Stderr)
157                 c.Check(exited, check.Equals, 0)
158                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
159                 c.Logf("%s", out)
160
161                 f, err := os.Open(npydir + "/matrix.0000.npy")
162                 c.Assert(err, check.IsNil)
163                 defer f.Close()
164                 npy, err := gonpy.NewReader(f)
165                 c.Assert(err, check.IsNil)
166                 c.Check(npy.Shape, check.DeepEquals, []int{4, 2})
167                 variants, err := npy.GetInt16()
168                 c.Check(variants, check.DeepEquals, []int16{2, 1, -1, -1, 2, 1, -1, -1})
169
170                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
171                 c.Assert(err, check.IsNil)
172                 c.Logf("%s", annotations)
173                 for _, s := range []string{
174                         "chr1:g.161A>T",
175                         "chr1:g.178A>T",
176                         "chr1:g.1_3delinsGGC",
177                         "chr1:g.222_224del",
178                 } {
179                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
180                 }
181
182                 for _, fnm := range []string{
183                         npydir + "/matrix.0001.annotations.csv",
184                         npydir + "/matrix.0002.annotations.csv",
185                 } {
186                         annotations, err := ioutil.ReadFile(fnm)
187                         c.Assert(err, check.IsNil)
188                         c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
189                 }
190         }
191
192         err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
193         c.Check(err, check.IsNil)
194
195         c.Log("=== slice-numpy + regions + merge ===")
196         {
197                 npydir := c.MkDir()
198                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
199                         "-local=true",
200                         "-regions=" + tmpdir + "/chr1and2-100-200.bed",
201                         "-input-dir=" + slicedir,
202                         "-output-dir=" + npydir,
203                         "-merge-output=true",
204                         "-single-hgvs-matrix=true",
205                 }, nil, os.Stderr, os.Stderr)
206                 c.Check(exited, check.Equals, 0)
207                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
208                 c.Logf("%s", out)
209
210                 f, err := os.Open(npydir + "/matrix.npy")
211                 c.Assert(err, check.IsNil)
212                 defer f.Close()
213                 npy, err := gonpy.NewReader(f)
214                 c.Assert(err, check.IsNil)
215                 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
216                 variants, err := npy.GetInt16()
217                 if c.Check(err, check.IsNil) {
218                         c.Check(variants, check.DeepEquals, []int16{2, 1, 3, 1, -1, -1, 4, 2, 2, 1, 3, 1, -1, -1, 4, 2})
219                 }
220
221                 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
222                 c.Assert(err, check.IsNil)
223                 c.Logf("%s", annotations)
224                 for _, s := range []string{
225                         "0,0,1,chr1:g.161A>T",
226                         "0,0,1,chr1:g.178A>T",
227                         "4,1,2,chr2:g.125_127delinsAAA",
228                 } {
229                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
230                 }
231         }
232
233         c.Log("=== slice-numpy + chunked hgvs matrix ===")
234         {
235                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
236 pipeline1/input1        1
237 pipeline1/input2        0
238 pipeline1dup/input1     1
239 pipeline1dup/input2     0
240 `), 0600)
241                 c.Assert(err, check.IsNil)
242                 npydir := c.MkDir()
243                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
244                         "-local=true",
245                         "-chunked-hgvs-matrix=true",
246                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
247                         "-chi2-case-control-column=CC",
248                         "-chi2-p-value=0.5",
249                         "-min-coverage=0.75",
250                         "-input-dir=" + slicedir,
251                         "-output-dir=" + npydir,
252                 }, nil, os.Stderr, os.Stderr)
253                 c.Check(exited, check.Equals, 0)
254                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
255                 c.Logf("%s", out)
256
257                 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
258                 c.Assert(err, check.IsNil)
259                 c.Check(string(annotations), check.Equals, `0,chr2:g.470_472del
260 1,chr2:g.471G>A
261 2,chr2:g.472G>A
262 `)
263         }
264
265         c.Log("=== slice-numpy + onehotChunked ===")
266         {
267                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
268 pipeline1/input1        1
269 pipeline1/input2        0
270 pipeline1dup/input1     1
271 pipeline1dup/input2     0
272 `), 0600)
273                 c.Assert(err, check.IsNil)
274                 npydir := c.MkDir()
275                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
276                         "-local=true",
277                         "-chunked-onehot=true",
278                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
279                         "-chi2-case-control-column=CC",
280                         "-chi2-p-value=0.5",
281                         "-min-coverage=0.75",
282                         "-input-dir=" + slicedir,
283                         "-output-dir=" + npydir,
284                 }, nil, os.Stderr, os.Stderr)
285                 c.Check(exited, check.Equals, 0)
286                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
287                 c.Logf("%s", out)
288
289                 f, err := os.Open(npydir + "/onehot.0002.npy")
290                 c.Assert(err, check.IsNil)
291                 defer f.Close()
292                 npy, err := gonpy.NewReader(f)
293                 c.Assert(err, check.IsNil)
294                 c.Check(npy.Shape, check.DeepEquals, []int{4, 6})
295                 onehot, err := npy.GetInt8()
296                 if c.Check(err, check.IsNil) {
297                         for r := 0; r < npy.Shape[0]; r++ {
298                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
299                         }
300                         c.Check(onehot, check.DeepEquals, []int8{
301                                 0, 0, 0, 1, 0, 0, // input1
302                                 0, 1, 0, 0, 0, 1, // input2
303                                 0, 0, 0, 1, 0, 0, // dup/input1
304                                 0, 1, 0, 0, 0, 1, // dup/input2
305                         })
306                 }
307         }
308
309         c.Log("=== slice-numpy + onehotSingle ===")
310         {
311                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
312 pipeline1/input1        1
313 pipeline1/input2        0
314 pipeline1dup/input1     1
315 pipeline1dup/input2     0
316 `), 0600)
317                 c.Assert(err, check.IsNil)
318                 npydir := c.MkDir()
319                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
320                         "-local=true",
321                         "-single-onehot=true",
322                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
323                         "-chi2-case-control-column=CC",
324                         "-chi2-p-value=0.5",
325                         "-min-coverage=0.75",
326                         "-input-dir=" + slicedir,
327                         "-output-dir=" + npydir,
328                 }, nil, os.Stderr, os.Stderr)
329                 c.Check(exited, check.Equals, 0)
330                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
331                 c.Logf("%s", out)
332
333                 f, err := os.Open(npydir + "/onehot.npy")
334                 c.Assert(err, check.IsNil)
335                 defer f.Close()
336                 npy, err := gonpy.NewReader(f)
337                 c.Assert(err, check.IsNil)
338                 c.Check(npy.Shape, check.DeepEquals, []int{2, 16})
339                 onehot, err := npy.GetUint32()
340                 if c.Check(err, check.IsNil) {
341                         for r := 0; r < npy.Shape[0]; r++ {
342                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
343                         }
344                         c.Check(onehot, check.DeepEquals, []uint32{
345                                 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 0, 2,
346                                 1, 1, 2, 2, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15,
347                         })
348                 }
349         }
350 }