Fix some tests.
[lightning.git] / slice_test.go
1 // Copyright (C) The Lightning Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package lightning
6
7 import (
8         "io/ioutil"
9         "os"
10         "os/exec"
11
12         "github.com/kshedden/gonpy"
13         "gopkg.in/check.v1"
14 )
15
16 type sliceSuite struct{}
17
18 var _ = check.Suite(&sliceSuite{})
19
20 func (s *sliceSuite) TestImportAndSlice(c *check.C) {
21         tmpdir := c.MkDir()
22         err := os.Mkdir(tmpdir+"/lib1", 0777)
23         c.Assert(err, check.IsNil)
24         err = os.Mkdir(tmpdir+"/lib2", 0777)
25         c.Assert(err, check.IsNil)
26         err = os.Mkdir(tmpdir+"/lib3", 0777)
27         c.Assert(err, check.IsNil)
28         cwd, err := os.Getwd()
29         c.Assert(err, check.IsNil)
30         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
31         c.Assert(err, check.IsNil)
32         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
33         c.Assert(err, check.IsNil)
34
35         err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
36         c.Check(err, check.IsNil)
37
38         c.Log("=== import testdata/ref ===")
39         exited := (&importer{}).RunCommand("import", []string{
40                 "-local=true",
41                 "-tag-library", "testdata/tags",
42                 "-output-tiles",
43                 "-save-incomplete-tiles",
44                 "-o", tmpdir + "/lib1/library1.gob",
45                 "testdata/ref.fasta",
46         }, nil, os.Stderr, os.Stderr)
47         c.Assert(exited, check.Equals, 0)
48
49         c.Log("=== import testdata/pipeline1 ===")
50         exited = (&importer{}).RunCommand("import", []string{
51                 "-local=true",
52                 "-tag-library", "testdata/tags",
53                 "-output-tiles",
54                 "-o", tmpdir + "/lib2/library2.gob",
55                 tmpdir + "/pipeline1",
56         }, nil, os.Stderr, os.Stderr)
57         c.Assert(exited, check.Equals, 0)
58
59         c.Log("=== import pipeline1dup ===")
60         exited = (&importer{}).RunCommand("import", []string{
61                 "-local=true",
62                 "-tag-library", "testdata/tags",
63                 "-output-tiles",
64                 "-o", tmpdir + "/lib3/library3.gob",
65                 tmpdir + "/pipeline1dup",
66         }, nil, os.Stderr, os.Stderr)
67         c.Assert(exited, check.Equals, 0)
68
69         slicedir := c.MkDir()
70
71         c.Log("=== slice ===")
72         exited = (&slicecmd{}).RunCommand("slice", []string{
73                 "-local=true",
74                 "-output-dir=" + slicedir,
75                 "-tags-per-file=2",
76                 tmpdir + "/lib1",
77                 tmpdir + "/lib2",
78                 tmpdir + "/lib3",
79         }, nil, os.Stderr, os.Stderr)
80         c.Check(exited, check.Equals, 0)
81         out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
82         c.Logf("%s", out)
83
84         c.Log("=== dump ===")
85         {
86                 dumpdir := c.MkDir()
87                 exited = (&dump{}).RunCommand("dump", []string{
88                         "-local=true",
89                         "-tags=4,6,7",
90                         "-input-dir=" + slicedir,
91                         "-output-dir=" + dumpdir,
92                 }, nil, os.Stderr, os.Stderr)
93                 c.Check(exited, check.Equals, 0)
94                 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
95                 c.Logf("%s", out)
96                 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
97                 c.Assert(err, check.IsNil)
98                 c.Logf("%s", dumped)
99                 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTG.*`)
100         }
101
102         c.Log("=== slice-numpy ===")
103         {
104                 npydir := c.MkDir()
105                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
106                         "-local=true",
107                         "-input-dir=" + slicedir,
108                         "-output-dir=" + npydir,
109                 }, nil, os.Stderr, os.Stderr)
110                 c.Check(exited, check.Equals, 0)
111                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
112                 c.Logf("%s", out)
113
114                 f, err := os.Open(npydir + "/matrix.0000.npy")
115                 c.Assert(err, check.IsNil)
116                 defer f.Close()
117                 npy, err := gonpy.NewReader(f)
118                 c.Assert(err, check.IsNil)
119                 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
120                 variants, err := npy.GetInt16()
121                 c.Check(variants, check.DeepEquals, []int16{2, 1, 1, 2, -1, -1, 1, 1, 2, 1, 1, 2, -1, -1, 1, 1})
122
123                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
124                 c.Assert(err, check.IsNil)
125                 c.Logf("%s", annotations)
126                 for _, s := range []string{
127                         "chr1:g.161A>T",
128                         "chr1:g.178A>T",
129                         "chr1:g.1_3delinsGGC",
130                         "chr1:g.222_224del",
131                 } {
132                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
133                 }
134
135                 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
136                 c.Assert(err, check.IsNil)
137                 c.Logf("%s", annotations)
138                 for _, s := range []string{
139                         ",2,chr2:g.1_3delinsAAA",
140                         ",2,chr2:g.125_127delinsAAA",
141                         ",4,chr2:g.125_127delinsAAA",
142                 } {
143                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
144                 }
145         }
146
147         c.Log("=== slice-numpy + regions ===")
148         {
149                 npydir := c.MkDir()
150                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
151                         "-local=true",
152                         "-regions=" + tmpdir + "/chr1-12-100.bed",
153                         "-input-dir=" + slicedir,
154                         "-output-dir=" + npydir,
155                         "-chunked-hgvs-matrix=true",
156                 }, nil, os.Stderr, os.Stderr)
157                 c.Check(exited, check.Equals, 0)
158                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
159                 c.Logf("%s", out)
160
161                 f, err := os.Open(npydir + "/matrix.0000.npy")
162                 c.Assert(err, check.IsNil)
163                 defer f.Close()
164                 npy, err := gonpy.NewReader(f)
165                 c.Assert(err, check.IsNil)
166                 c.Check(npy.Shape, check.DeepEquals, []int{4, 2})
167                 variants, err := npy.GetInt16()
168                 c.Check(variants, check.DeepEquals, []int16{2, 1, -1, -1, 2, 1, -1, -1})
169
170                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
171                 c.Assert(err, check.IsNil)
172                 c.Logf("%s", annotations)
173                 for _, s := range []string{
174                         "chr1:g.161A>T",
175                         "chr1:g.178A>T",
176                         "chr1:g.1_3delinsGGC",
177                         "chr1:g.222_224del",
178                 } {
179                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
180                 }
181
182                 for _, fnm := range []string{
183                         npydir + "/matrix.0001.annotations.csv",
184                         npydir + "/matrix.0002.annotations.csv",
185                 } {
186                         annotations, err := ioutil.ReadFile(fnm)
187                         c.Assert(err, check.IsNil)
188                         c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
189                 }
190         }
191
192         err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
193         c.Check(err, check.IsNil)
194
195         c.Log("=== slice-numpy + regions + merge ===")
196         {
197                 npydir := c.MkDir()
198                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
199                         "-local=true",
200                         "-regions=" + tmpdir + "/chr1and2-100-200.bed",
201                         "-input-dir=" + slicedir,
202                         "-output-dir=" + npydir,
203                         "-merge-output=true",
204                         "-single-hgvs-matrix=true",
205                 }, nil, os.Stderr, os.Stderr)
206                 c.Check(exited, check.Equals, 0)
207                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
208                 c.Logf("%s", out)
209
210                 f, err := os.Open(npydir + "/matrix.npy")
211                 c.Assert(err, check.IsNil)
212                 defer f.Close()
213                 npy, err := gonpy.NewReader(f)
214                 c.Assert(err, check.IsNil)
215                 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
216                 variants, err := npy.GetInt16()
217                 if c.Check(err, check.IsNil) {
218                         c.Check(variants, check.DeepEquals, []int16{
219                                 2, 1, 3, 1,
220                                 -1, -1, 4, 2,
221                                 2, 1, 3, 1,
222                                 -1, -1, 4, 2,
223                         })
224                 }
225
226                 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
227                 c.Assert(err, check.IsNil)
228                 c.Logf("%s", annotations)
229                 for _, s := range []string{
230                         "0,0,1,chr1:g.161A>T",
231                         "0,0,1,chr1:g.178A>T",
232                         "4,1,2,chr2:g.125_127delinsAAA",
233                 } {
234                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
235                 }
236         }
237
238         c.Log("=== slice-numpy + chunked hgvs matrix ===")
239         {
240                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
241 pipeline1/input1        1
242 pipeline1/input2        0
243 pipeline1dup/input1     1
244 pipeline1dup/input2     0
245 `), 0600)
246                 c.Assert(err, check.IsNil)
247
248                 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
249                         "-local=true",
250                         "-case-control-file=" + tmpdir + "/casecontrol.tsv",
251                         "-case-control-column=CC",
252                         "-input-dir=" + slicedir,
253                         "-output-dir=" + tmpdir,
254                 }, nil, os.Stderr, os.Stderr)
255                 c.Check(exited, check.Equals, 0)
256
257                 npydir := c.MkDir()
258                 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
259                         "-local=true",
260                         "-chunked-hgvs-matrix=true",
261                         "-samples=" + tmpdir + "/samples.csv",
262                         "-chi2-p-value=0.5",
263                         "-min-coverage=0.75",
264                         "-input-dir=" + slicedir,
265                         "-output-dir=" + npydir,
266                 }, nil, os.Stderr, os.Stderr)
267                 c.Check(exited, check.Equals, 0)
268                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
269                 c.Logf("%s", out)
270
271                 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
272                 c.Assert(err, check.IsNil)
273                 c.Check(string(annotations), check.Equals, `0,chr2:g.1_3delinsAAA
274 1,chr2:g.125_127delinsAAA
275 2,chr2:g.241_245delinsAAAAA
276 3,chr2:g.291C>A
277 4,chr2:g.470_472del
278 5,chr2:g.471G>A
279 6,chr2:g.472G>A
280 `)
281         }
282
283         c.Log("=== slice-numpy + onehotChunked ===")
284         {
285                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
286 pipeline1/input1        1
287 pipeline1/input2        0
288 pipeline1dup/input1     1
289 pipeline1dup/input2     0
290 `), 0600)
291                 c.Assert(err, check.IsNil)
292
293                 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
294                         "-local=true",
295                         "-case-control-file=" + tmpdir + "/casecontrol.tsv",
296                         "-case-control-column=CC",
297                         "-input-dir=" + slicedir,
298                         "-output-dir=" + tmpdir,
299                 }, nil, os.Stderr, os.Stderr)
300                 c.Check(exited, check.Equals, 0)
301
302                 npydir := c.MkDir()
303                 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
304                         "-local=true",
305                         "-chunked-onehot=true",
306                         "-samples=" + tmpdir + "/samples.csv",
307                         "-chi2-p-value=0.5",
308                         "-min-coverage=0.75",
309                         "-input-dir=" + slicedir,
310                         "-output-dir=" + npydir,
311                 }, nil, os.Stderr, os.Stderr)
312                 c.Check(exited, check.Equals, 0)
313                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
314                 c.Logf("%s", out)
315
316                 f, err := os.Open(npydir + "/onehot.0002.npy")
317                 c.Assert(err, check.IsNil)
318                 defer f.Close()
319                 npy, err := gonpy.NewReader(f)
320                 c.Assert(err, check.IsNil)
321                 c.Check(npy.Shape, check.DeepEquals, []int{4, 3})
322                 onehot, err := npy.GetInt8()
323                 if c.Check(err, check.IsNil) {
324                         for r := 0; r < npy.Shape[0]; r++ {
325                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
326                         }
327                         c.Check(onehot, check.DeepEquals, []int8{
328                                 0, 1, 0, // input1
329                                 1, 0, 1, // input2
330                                 0, 1, 0, // dup/input1
331                                 1, 0, 1, // dup/input2
332                         })
333                 }
334         }
335
336         c.Log("=== slice-numpy + onehotSingle ===")
337         {
338                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
339 pipeline1/input1        1
340 pipeline1/input2        0
341 pipeline1dup/input1     1
342 pipeline1dup/input2     0
343 `), 0600)
344                 c.Assert(err, check.IsNil)
345
346                 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
347                         "-local=true",
348                         "-case-control-file=" + tmpdir + "/casecontrol.tsv",
349                         "-case-control-column=CC",
350                         "-input-dir=" + slicedir,
351                         "-output-dir=" + tmpdir,
352                 }, nil, os.Stderr, os.Stderr)
353                 c.Check(exited, check.Equals, 0)
354
355                 npydir := c.MkDir()
356                 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
357                         "-local=true",
358                         "-single-onehot=true",
359                         "-samples=" + tmpdir + "/samples.csv",
360                         "-chi2-p-value=0.5",
361                         "-min-coverage=0.75",
362                         "-input-dir=" + slicedir,
363                         "-output-dir=" + npydir,
364                         "-debug-tag=1",
365                 }, nil, os.Stderr, os.Stderr)
366                 c.Check(exited, check.Equals, 0)
367                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
368                 c.Logf("%s", out)
369
370                 f, err := os.Open(npydir + "/onehot.npy")
371                 c.Assert(err, check.IsNil)
372                 defer f.Close()
373                 npy, err := gonpy.NewReader(f)
374                 c.Assert(err, check.IsNil)
375                 c.Check(npy.Shape, check.DeepEquals, []int{2, 12})
376                 onehot, err := npy.GetUint32()
377                 if c.Check(err, check.IsNil) {
378                         for r := 0; r < npy.Shape[0]; r++ {
379                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
380                         }
381                         c.Check(onehot, check.DeepEquals, []uint32{
382                                 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 0, 2,
383                                 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,
384                         })
385                 }
386
387                 f, err = os.Open(npydir + "/onehot-columns.npy")
388                 c.Assert(err, check.IsNil)
389                 defer f.Close()
390                 npy, err = gonpy.NewReader(f)
391                 c.Assert(err, check.IsNil)
392                 c.Check(npy.Shape, check.DeepEquals, []int{5, 6})
393                 onehotcols, err := npy.GetInt32()
394                 if c.Check(err, check.IsNil) {
395                         for r := 0; r < npy.Shape[0]; r++ {
396                                 c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
397                         }
398                         c.Check(onehotcols, check.DeepEquals, []int32{
399                                 1, 4, 4, 4, 6, 6,
400                                 2, 2, 3, 4, 2, 3,
401                                 0, 0, 0, 0, 0, 0,
402                                 157299, 157299, 157299, 157299, 157299, 157299,
403                                 803273, 803273, 803273, 803273, 803273, 803273,
404                         })
405                 }
406         }
407 }
408
409 func (s *sliceSuite) TestSpanningTile(c *check.C) {
410         tmpdir := c.MkDir()
411         err := os.Mkdir(tmpdir+"/lib1", 0777)
412         c.Assert(err, check.IsNil)
413         err = os.Mkdir(tmpdir+"/lib2", 0777)
414         c.Assert(err, check.IsNil)
415         cwd, err := os.Getwd()
416         c.Assert(err, check.IsNil)
417         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
418         c.Assert(err, check.IsNil)
419         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
420         c.Assert(err, check.IsNil)
421
422         err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
423         c.Check(err, check.IsNil)
424
425         c.Log("=== import testdata/ref ===")
426         exited := (&importer{}).RunCommand("import", []string{
427                 "-local=true",
428                 "-tag-library", "testdata/tags",
429                 "-output-tiles",
430                 "-save-incomplete-tiles",
431                 "-o", tmpdir + "/lib1/library1.gob",
432                 "testdata/ref.fasta",
433         }, nil, os.Stderr, os.Stderr)
434         c.Assert(exited, check.Equals, 0)
435
436         c.Log("=== import testdata/spanningtile ===")
437         exited = (&importer{}).RunCommand("import", []string{
438                 "-local=true",
439                 "-tag-library", "testdata/tags",
440                 "-output-tiles",
441                 "-o", tmpdir + "/lib2/library2.gob",
442                 cwd + "/testdata/spanningtile",
443         }, nil, os.Stderr, os.Stderr)
444         c.Assert(exited, check.Equals, 0)
445
446         slicedir := c.MkDir()
447
448         c.Log("=== slice ===")
449         exited = (&slicecmd{}).RunCommand("slice", []string{
450                 "-local=true",
451                 "-output-dir=" + slicedir,
452                 "-tags-per-file=2",
453                 tmpdir + "/lib1",
454                 tmpdir + "/lib2",
455         }, nil, os.Stderr, os.Stderr)
456         c.Check(exited, check.Equals, 0)
457         out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
458         c.Logf("%s", out)
459
460         c.Log("=== dump ===")
461         {
462                 dumpdir := c.MkDir()
463                 exited = (&dump{}).RunCommand("dump", []string{
464                         "-local=true",
465                         "-tags=5,6",
466                         "-input-dir=" + slicedir,
467                         "-output-dir=" + dumpdir,
468                 }, nil, os.Stderr, os.Stderr)
469                 c.Check(exited, check.Equals, 0)
470                 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
471                 c.Logf("%s", out)
472                 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
473                 c.Assert(err, check.IsNil)
474                 c.Logf("%s", dumped)
475                 // spanning tile for tag 5 with A>G snp in tag 6
476                 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n5,2,0,chr2,225,.*AAAACTGATCCGAAAAAAATACAA.*`)
477                 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTGATCCAAAAAAAATACAA.*`)
478         }
479
480         c.Log("=== slice-numpy ===")
481         {
482                 npydir := c.MkDir()
483                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
484                         "-local=true",
485                         "-input-dir=" + slicedir,
486                         "-output-dir=" + npydir,
487                 }, nil, os.Stderr, os.Stderr)
488                 c.Check(exited, check.Equals, 0)
489                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
490                 c.Logf("%s", out)
491
492                 f, err := os.Open(npydir + "/matrix.0000.npy")
493                 c.Assert(err, check.IsNil)
494                 defer f.Close()
495                 npy, err := gonpy.NewReader(f)
496                 c.Assert(err, check.IsNil)
497                 c.Check(npy.Shape, check.DeepEquals, []int{2, 4})
498                 variants, err := npy.GetInt16()
499                 c.Check(variants, check.DeepEquals, []int16{
500                         -1, -1, 1, 1,
501                         -1, -1, 1, 2,
502                 })
503
504                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
505                 c.Assert(err, check.IsNil)
506                 c.Logf("%s", annotations)
507
508                 f, err = os.Open(npydir + "/matrix.0002.npy")
509                 c.Assert(err, check.IsNil)
510                 defer f.Close()
511                 npy, err = gonpy.NewReader(f)
512                 c.Assert(err, check.IsNil)
513                 c.Check(npy.Shape, check.DeepEquals, []int{2, 4})
514                 variants, err = npy.GetInt16()
515                 c.Check(variants, check.DeepEquals, []int16{
516                         1, 1, 2, 1,
517                         1, 1, 1, 1,
518                 })
519
520                 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
521                 c.Assert(err, check.IsNil)
522                 c.Logf("%s", annotations)
523                 for _, s := range []string{
524                         "chr2:g\\.360A>G",
525                 } {
526                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
527                 }
528         }
529
530         c.Log("=== slice-numpy + regions ===")
531         {
532                 npydir := c.MkDir()
533                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
534                         "-local=true",
535                         "-regions=" + tmpdir + "/chr1-12-100.bed",
536                         "-input-dir=" + slicedir,
537                         "-output-dir=" + npydir,
538                         "-chunked-hgvs-matrix=true",
539                 }, nil, os.Stderr, os.Stderr)
540                 c.Check(exited, check.Equals, 0)
541                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
542                 c.Logf("%s", out)
543
544                 f, err := os.Open(npydir + "/matrix.0000.npy")
545                 c.Assert(err, check.IsNil)
546                 defer f.Close()
547                 npy, err := gonpy.NewReader(f)
548                 c.Assert(err, check.IsNil)
549                 c.Check(npy.Shape, check.DeepEquals, []int{2, 2})
550                 variants, err := npy.GetInt16()
551                 c.Check(variants, check.DeepEquals, []int16{-1, -1, -1, -1})
552
553                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
554                 c.Assert(err, check.IsNil)
555                 c.Check(string(annotations), check.Equals, "0,0,1,=,chr1,0,,,\n")
556
557                 for _, fnm := range []string{
558                         npydir + "/matrix.0001.annotations.csv",
559                         npydir + "/matrix.0002.annotations.csv",
560                 } {
561                         annotations, err := ioutil.ReadFile(fnm)
562                         c.Assert(err, check.IsNil)
563                         c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
564                 }
565         }
566
567         err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
568         c.Check(err, check.IsNil)
569
570         c.Log("=== slice-numpy + regions + merge ===")
571         {
572                 npydir := c.MkDir()
573                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
574                         "-local=true",
575                         "-regions=" + tmpdir + "/chr1and2-100-200.bed",
576                         "-input-dir=" + slicedir,
577                         "-output-dir=" + npydir,
578                         "-merge-output=true",
579                         "-single-hgvs-matrix=true",
580                 }, nil, os.Stderr, os.Stderr)
581                 c.Check(exited, check.Equals, 0)
582                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
583                 c.Logf("%s", out)
584
585                 f, err := os.Open(npydir + "/matrix.npy")
586                 c.Assert(err, check.IsNil)
587                 defer f.Close()
588                 npy, err := gonpy.NewReader(f)
589                 c.Assert(err, check.IsNil)
590                 c.Check(npy.Shape, check.DeepEquals, []int{2, 4})
591                 variants, err := npy.GetInt16()
592                 if c.Check(err, check.IsNil) {
593                         c.Check(variants, check.DeepEquals, []int16{
594                                 -1, -1, 1, 1,
595                                 -1, -1, 1, 1,
596                         })
597                 }
598
599                 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
600                 c.Assert(err, check.IsNil)
601                 c.Check(string(annotations), check.Equals, "")
602         }
603
604         c.Log("=== slice-numpy + chunked hgvs matrix ===")
605         {
606                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
607 spanningtile/input1     1
608 `), 0600)
609                 c.Assert(err, check.IsNil)
610
611                 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
612                         "-local=true",
613                         "-case-control-file=" + tmpdir + "/casecontrol.tsv",
614                         "-case-control-column=CC",
615                         "-input-dir=" + slicedir,
616                         "-output-dir=" + tmpdir,
617                 }, nil, os.Stderr, os.Stderr)
618                 c.Check(exited, check.Equals, 0)
619
620                 npydir := c.MkDir()
621                 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
622                         "-local=true",
623                         "-chunked-hgvs-matrix=true",
624                         "-samples=" + tmpdir + "/samples.csv",
625                         "-chi2-p-value=1",
626                         "-min-coverage=0.75",
627                         "-input-dir=" + slicedir,
628                         "-output-dir=" + npydir,
629                 }, nil, os.Stderr, os.Stderr)
630                 c.Check(exited, check.Equals, 0)
631                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
632                 c.Logf("%s", out)
633
634                 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
635                 c.Assert(err, check.IsNil)
636                 c.Check(string(annotations), check.Equals, `0,chr2:g.360A>G
637 `)
638         }
639
640         c.Log("=== slice-numpy + onehotChunked ===")
641         {
642                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
643 spanningtile/input1     1
644 `), 0600)
645                 c.Assert(err, check.IsNil)
646
647                 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
648                         "-local=true",
649                         "-case-control-file=" + tmpdir + "/casecontrol.tsv",
650                         "-case-control-column=CC",
651                         "-input-dir=" + slicedir,
652                         "-output-dir=" + tmpdir,
653                 }, nil, os.Stderr, os.Stderr)
654                 c.Check(exited, check.Equals, 0)
655
656                 npydir := c.MkDir()
657                 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
658                         "-local=true",
659                         "-chunked-onehot=true",
660                         "-samples=" + tmpdir + "/samples.csv",
661                         "-chi2-p-value=1",
662                         "-min-coverage=0.75",
663                         "-input-dir=" + slicedir,
664                         "-output-dir=" + npydir,
665                 }, nil, os.Stderr, os.Stderr)
666                 c.Check(exited, check.Equals, 0)
667                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
668                 c.Logf("%s", out)
669
670                 f, err := os.Open(npydir + "/onehot.0002.npy")
671                 c.Assert(err, check.IsNil)
672                 defer f.Close()
673                 npy, err := gonpy.NewReader(f)
674                 c.Assert(err, check.IsNil)
675                 c.Check(npy.Shape, check.DeepEquals, []int{1, 2})
676                 onehot, err := npy.GetInt8()
677                 if c.Check(err, check.IsNil) {
678                         for r := 0; r < npy.Shape[0]; r++ {
679                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
680                         }
681                         c.Check(onehot, check.DeepEquals, []int8{
682                                 0, 1, // input1
683                         })
684                 }
685         }
686
687         c.Log("=== slice-numpy + onehotSingle ===")
688         {
689                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
690 spanningtile/input1     1
691 `), 0600)
692                 c.Assert(err, check.IsNil)
693
694                 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
695                         "-local=true",
696                         "-case-control-file=" + tmpdir + "/casecontrol.tsv",
697                         "-case-control-column=CC",
698                         "-input-dir=" + slicedir,
699                         "-output-dir=" + tmpdir,
700                 }, nil, os.Stderr, os.Stderr)
701                 c.Check(exited, check.Equals, 0)
702
703                 npydir := c.MkDir()
704                 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
705                         "-local=true",
706                         "-single-onehot=true",
707                         "-samples=" + tmpdir + "/samples.csv",
708                         "-chi2-p-value=1",
709                         "-min-coverage=0.75",
710                         "-input-dir=" + slicedir,
711                         "-output-dir=" + npydir,
712                         "-debug-tag=1",
713                 }, nil, os.Stderr, os.Stderr)
714                 c.Check(exited, check.Equals, 0)
715                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
716                 c.Logf("%s", out)
717
718                 f, err := os.Open(npydir + "/onehot.npy")
719                 c.Assert(err, check.IsNil)
720                 defer f.Close()
721                 npy, err := gonpy.NewReader(f)
722                 c.Assert(err, check.IsNil)
723                 c.Check(npy.Shape, check.DeepEquals, []int{2, 1})
724                 onehot, err := npy.GetUint32()
725                 if c.Check(err, check.IsNil) {
726                         for r := 0; r < npy.Shape[0]; r++ {
727                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
728                         }
729                         c.Check(onehot, check.DeepEquals, []uint32{0, 3})
730                 }
731
732                 f, err = os.Open(npydir + "/onehot-columns.npy")
733                 c.Assert(err, check.IsNil)
734                 defer f.Close()
735                 npy, err = gonpy.NewReader(f)
736                 c.Assert(err, check.IsNil)
737                 c.Check(npy.Shape, check.DeepEquals, []int{5, 4})
738                 onehotcols, err := npy.GetInt32()
739                 if c.Check(err, check.IsNil) {
740                         for r := 0; r < npy.Shape[0]; r++ {
741                                 c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
742                         }
743                         c.Check(onehotcols, check.DeepEquals, []int32{
744                                 1, 1, 5, 5,
745                                 2, 2, 2, 2,
746                                 1, 0, 1, 0,
747                                 1000000, 1000000, 1000000, 1000000,
748                                 0, 0, 0, 0,
749                         })
750                 }
751         }
752 }
753
754 func (s *sliceSuite) Test_tv2homhet(c *check.C) {
755         cmd := &sliceNumpy{
756                 cgnames:         []string{"sample1", "sample2", "sample3", "sample4"},
757                 chi2Cases:       []bool{false, true, true, false},
758                 chi2PValue:      .5,
759                 includeVariant1: true,
760                 minCoverage:     3,
761         }
762         cgs := map[string]CompactGenome{
763                 "sample1": CompactGenome{
764                         Variants: []tileVariantID{0, 0, 1, 1}, // hom tv=1
765                 },
766                 "sample2": CompactGenome{
767                         Variants: []tileVariantID{0, 0, 5, 5}, // hom tv=2
768                 },
769                 "sample3": CompactGenome{
770                         Variants: []tileVariantID{0, 0, 5, 1}, // het tv=1, tv=2
771                 },
772                 "sample4": CompactGenome{
773                         Variants: []tileVariantID{0, 0, 9, 9}, // hom tv=3
774                 },
775         }
776         maxv := tileVariantID(3)
777         remap := []tileVariantID{0, 1, 0, 0, 0, 2, 0, 0, 0, 3}
778         chunkstarttag := tagID(10)
779         fakevariant := TileVariant{Sequence: []byte("ACGT")}
780         seq := map[tagID][]TileVariant{}
781         for tag := tagID(10); tag < 12; tag++ {
782                 seq[tag-chunkstarttag] = []TileVariant{TileVariant{}, fakevariant, TileVariant{}, TileVariant{}, TileVariant{}, fakevariant}
783                 c.Logf("=== tag %d", tag)
784                 chunk, xref := cmd.tv2homhet(cgs, maxv, remap, tag, chunkstarttag, seq)
785                 c.Logf("chunk len=%d", len(chunk))
786                 for _, x := range chunk {
787                         c.Logf("%+v", x)
788                 }
789                 c.Logf("xref len=%d", len(xref))
790                 for _, x := range xref {
791                         c.Logf("%+v", x)
792                 }
793                 out := onehotcols2int8(chunk)
794                 c.Logf("onehotcols2int8(chunk) len=%d", len(out))
795                 for i := 0; i < len(out); i += len(chunk) {
796                         c.Logf("%+v", out[i:i+len(chunk)])
797                 }
798                 coords := onehotChunk2Indirect(chunk)
799                 c.Logf("onehotChunk2Indirect(chunk) len=%d", len(coords))
800                 for _, x := range coords {
801                         c.Logf("%+v", x)
802                 }
803         }
804 }