Generate annotations for spanning tiles.
[lightning.git] / slice_test.go
1 // Copyright (C) The Lightning Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package lightning
6
7 import (
8         "io/ioutil"
9         "os"
10         "os/exec"
11
12         "github.com/kshedden/gonpy"
13         "gopkg.in/check.v1"
14 )
15
16 type sliceSuite struct{}
17
18 var _ = check.Suite(&sliceSuite{})
19
20 func (s *sliceSuite) TestImportAndSlice(c *check.C) {
21         tmpdir := c.MkDir()
22         err := os.Mkdir(tmpdir+"/lib1", 0777)
23         c.Assert(err, check.IsNil)
24         err = os.Mkdir(tmpdir+"/lib2", 0777)
25         c.Assert(err, check.IsNil)
26         err = os.Mkdir(tmpdir+"/lib3", 0777)
27         c.Assert(err, check.IsNil)
28         cwd, err := os.Getwd()
29         c.Assert(err, check.IsNil)
30         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
31         c.Assert(err, check.IsNil)
32         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
33         c.Assert(err, check.IsNil)
34
35         err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
36         c.Check(err, check.IsNil)
37
38         c.Log("=== import testdata/ref ===")
39         exited := (&importer{}).RunCommand("import", []string{
40                 "-local=true",
41                 "-tag-library", "testdata/tags",
42                 "-output-tiles",
43                 "-save-incomplete-tiles",
44                 "-o", tmpdir + "/lib1/library1.gob",
45                 "testdata/ref.fasta",
46         }, nil, os.Stderr, os.Stderr)
47         c.Assert(exited, check.Equals, 0)
48
49         c.Log("=== import testdata/pipeline1 ===")
50         exited = (&importer{}).RunCommand("import", []string{
51                 "-local=true",
52                 "-tag-library", "testdata/tags",
53                 "-output-tiles",
54                 "-o", tmpdir + "/lib2/library2.gob",
55                 tmpdir + "/pipeline1",
56         }, nil, os.Stderr, os.Stderr)
57         c.Assert(exited, check.Equals, 0)
58
59         c.Log("=== import pipeline1dup ===")
60         exited = (&importer{}).RunCommand("import", []string{
61                 "-local=true",
62                 "-tag-library", "testdata/tags",
63                 "-output-tiles",
64                 "-o", tmpdir + "/lib3/library3.gob",
65                 tmpdir + "/pipeline1dup",
66         }, nil, os.Stderr, os.Stderr)
67         c.Assert(exited, check.Equals, 0)
68
69         slicedir := c.MkDir()
70
71         c.Log("=== slice ===")
72         exited = (&slicecmd{}).RunCommand("slice", []string{
73                 "-local=true",
74                 "-output-dir=" + slicedir,
75                 "-tags-per-file=2",
76                 tmpdir + "/lib1",
77                 tmpdir + "/lib2",
78                 tmpdir + "/lib3",
79         }, nil, os.Stderr, os.Stderr)
80         c.Check(exited, check.Equals, 0)
81         out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
82         c.Logf("%s", out)
83
84         c.Log("=== dump ===")
85         {
86                 dumpdir := c.MkDir()
87                 exited = (&dump{}).RunCommand("dump", []string{
88                         "-local=true",
89                         "-tags=4,6,7",
90                         "-input-dir=" + slicedir,
91                         "-output-dir=" + dumpdir,
92                 }, nil, os.Stderr, os.Stderr)
93                 c.Check(exited, check.Equals, 0)
94                 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
95                 c.Logf("%s", out)
96                 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
97                 c.Assert(err, check.IsNil)
98                 c.Logf("%s", dumped)
99                 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTG.*`)
100         }
101
102         c.Log("=== slice-numpy ===")
103         {
104                 npydir := c.MkDir()
105                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
106                         "-local=true",
107                         "-input-dir=" + slicedir,
108                         "-output-dir=" + npydir,
109                 }, nil, os.Stderr, os.Stderr)
110                 c.Check(exited, check.Equals, 0)
111                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
112                 c.Logf("%s", out)
113
114                 f, err := os.Open(npydir + "/matrix.0000.npy")
115                 c.Assert(err, check.IsNil)
116                 defer f.Close()
117                 npy, err := gonpy.NewReader(f)
118                 c.Assert(err, check.IsNil)
119                 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
120                 variants, err := npy.GetInt16()
121                 c.Check(variants, check.DeepEquals, []int16{2, 1, 1, 2, -1, -1, 1, 1, 2, 1, 1, 2, -1, -1, 1, 1})
122
123                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
124                 c.Assert(err, check.IsNil)
125                 c.Logf("%s", annotations)
126                 for _, s := range []string{
127                         "chr1:g.161A>T",
128                         "chr1:g.178A>T",
129                         "chr1:g.1_3delinsGGC",
130                         "chr1:g.222_224del",
131                 } {
132                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
133                 }
134
135                 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
136                 c.Assert(err, check.IsNil)
137                 c.Logf("%s", annotations)
138                 for _, s := range []string{
139                         ",2,chr2:g.1_3delinsAAA",
140                         ",2,chr2:g.125_127delinsAAA",
141                         ",4,chr2:g.125_127delinsAAA",
142                 } {
143                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
144                 }
145         }
146
147         c.Log("=== slice-numpy + regions ===")
148         {
149                 npydir := c.MkDir()
150                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
151                         "-local=true",
152                         "-regions=" + tmpdir + "/chr1-12-100.bed",
153                         "-input-dir=" + slicedir,
154                         "-output-dir=" + npydir,
155                         "-chunked-hgvs-matrix=true",
156                 }, nil, os.Stderr, os.Stderr)
157                 c.Check(exited, check.Equals, 0)
158                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
159                 c.Logf("%s", out)
160
161                 f, err := os.Open(npydir + "/matrix.0000.npy")
162                 c.Assert(err, check.IsNil)
163                 defer f.Close()
164                 npy, err := gonpy.NewReader(f)
165                 c.Assert(err, check.IsNil)
166                 c.Check(npy.Shape, check.DeepEquals, []int{4, 2})
167                 variants, err := npy.GetInt16()
168                 c.Check(variants, check.DeepEquals, []int16{2, 1, -1, -1, 2, 1, -1, -1})
169
170                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
171                 c.Assert(err, check.IsNil)
172                 c.Logf("%s", annotations)
173                 for _, s := range []string{
174                         "chr1:g.161A>T",
175                         "chr1:g.178A>T",
176                         "chr1:g.1_3delinsGGC",
177                         "chr1:g.222_224del",
178                 } {
179                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
180                 }
181
182                 for _, fnm := range []string{
183                         npydir + "/matrix.0001.annotations.csv",
184                         npydir + "/matrix.0002.annotations.csv",
185                 } {
186                         annotations, err := ioutil.ReadFile(fnm)
187                         c.Assert(err, check.IsNil)
188                         c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
189                 }
190         }
191
192         err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
193         c.Check(err, check.IsNil)
194
195         c.Log("=== slice-numpy + regions + merge ===")
196         {
197                 npydir := c.MkDir()
198                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
199                         "-local=true",
200                         "-regions=" + tmpdir + "/chr1and2-100-200.bed",
201                         "-input-dir=" + slicedir,
202                         "-output-dir=" + npydir,
203                         "-merge-output=true",
204                         "-single-hgvs-matrix=true",
205                 }, nil, os.Stderr, os.Stderr)
206                 c.Check(exited, check.Equals, 0)
207                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
208                 c.Logf("%s", out)
209
210                 f, err := os.Open(npydir + "/matrix.npy")
211                 c.Assert(err, check.IsNil)
212                 defer f.Close()
213                 npy, err := gonpy.NewReader(f)
214                 c.Assert(err, check.IsNil)
215                 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
216                 variants, err := npy.GetInt16()
217                 if c.Check(err, check.IsNil) {
218                         c.Check(variants, check.DeepEquals, []int16{
219                                 2, 1, 3, 1,
220                                 -1, -1, 4, 2,
221                                 2, 1, 3, 1,
222                                 -1, -1, 4, 2,
223                         })
224                 }
225
226                 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
227                 c.Assert(err, check.IsNil)
228                 c.Logf("%s", annotations)
229                 for _, s := range []string{
230                         "0,0,1,chr1:g.161A>T",
231                         "0,0,1,chr1:g.178A>T",
232                         "4,1,2,chr2:g.125_127delinsAAA",
233                 } {
234                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
235                 }
236         }
237
238         c.Log("=== slice-numpy + chunked hgvs matrix ===")
239         {
240                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
241 pipeline1/input1        1
242 pipeline1/input2        0
243 pipeline1dup/input1     1
244 pipeline1dup/input2     0
245 `), 0600)
246                 c.Assert(err, check.IsNil)
247                 npydir := c.MkDir()
248                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
249                         "-local=true",
250                         "-chunked-hgvs-matrix=true",
251                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
252                         "-chi2-case-control-column=CC",
253                         "-chi2-p-value=0.5",
254                         "-min-coverage=0.75",
255                         "-input-dir=" + slicedir,
256                         "-output-dir=" + npydir,
257                 }, nil, os.Stderr, os.Stderr)
258                 c.Check(exited, check.Equals, 0)
259                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
260                 c.Logf("%s", out)
261
262                 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
263                 c.Assert(err, check.IsNil)
264                 c.Check(string(annotations), check.Equals, `0,chr2:g.1_3delinsAAA
265 1,chr2:g.125_127delinsAAA
266 2,chr2:g.241_245delinsAAAAA
267 3,chr2:g.291C>A
268 4,chr2:g.470_472del
269 5,chr2:g.471G>A
270 6,chr2:g.472G>A
271 `)
272         }
273
274         c.Log("=== slice-numpy + onehotChunked ===")
275         {
276                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
277 pipeline1/input1        1
278 pipeline1/input2        0
279 pipeline1dup/input1     1
280 pipeline1dup/input2     0
281 `), 0600)
282                 c.Assert(err, check.IsNil)
283                 npydir := c.MkDir()
284                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
285                         "-local=true",
286                         "-chunked-onehot=true",
287                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
288                         "-chi2-case-control-column=CC",
289                         "-chi2-p-value=0.5",
290                         "-min-coverage=0.75",
291                         "-input-dir=" + slicedir,
292                         "-output-dir=" + npydir,
293                 }, nil, os.Stderr, os.Stderr)
294                 c.Check(exited, check.Equals, 0)
295                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
296                 c.Logf("%s", out)
297
298                 f, err := os.Open(npydir + "/onehot.0002.npy")
299                 c.Assert(err, check.IsNil)
300                 defer f.Close()
301                 npy, err := gonpy.NewReader(f)
302                 c.Assert(err, check.IsNil)
303                 c.Check(npy.Shape, check.DeepEquals, []int{4, 3})
304                 onehot, err := npy.GetInt8()
305                 if c.Check(err, check.IsNil) {
306                         for r := 0; r < npy.Shape[0]; r++ {
307                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
308                         }
309                         c.Check(onehot, check.DeepEquals, []int8{
310                                 0, 1, 0, // input1
311                                 1, 0, 1, // input2
312                                 0, 1, 0, // dup/input1
313                                 1, 0, 1, // dup/input2
314                         })
315                 }
316         }
317
318         c.Log("=== slice-numpy + onehotSingle ===")
319         {
320                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
321 pipeline1/input1        1
322 pipeline1/input2        0
323 pipeline1dup/input1     1
324 pipeline1dup/input2     0
325 `), 0600)
326                 c.Assert(err, check.IsNil)
327                 npydir := c.MkDir()
328                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
329                         "-local=true",
330                         "-single-onehot=true",
331                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
332                         "-chi2-case-control-column=CC",
333                         "-chi2-p-value=0.5",
334                         "-min-coverage=0.75",
335                         "-input-dir=" + slicedir,
336                         "-output-dir=" + npydir,
337                         "-debug-tag=1",
338                 }, nil, os.Stderr, os.Stderr)
339                 c.Check(exited, check.Equals, 0)
340                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
341                 c.Logf("%s", out)
342
343                 f, err := os.Open(npydir + "/onehot.npy")
344                 c.Assert(err, check.IsNil)
345                 defer f.Close()
346                 npy, err := gonpy.NewReader(f)
347                 c.Assert(err, check.IsNil)
348                 c.Check(npy.Shape, check.DeepEquals, []int{2, 12})
349                 onehot, err := npy.GetUint32()
350                 if c.Check(err, check.IsNil) {
351                         for r := 0; r < npy.Shape[0]; r++ {
352                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
353                         }
354                         c.Check(onehot, check.DeepEquals, []uint32{
355                                 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 0, 2,
356                                 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,
357                         })
358                 }
359
360                 f, err = os.Open(npydir + "/onehot-columns.npy")
361                 c.Assert(err, check.IsNil)
362                 defer f.Close()
363                 npy, err = gonpy.NewReader(f)
364                 c.Assert(err, check.IsNil)
365                 c.Check(npy.Shape, check.DeepEquals, []int{5, 6})
366                 onehotcols, err := npy.GetInt32()
367                 if c.Check(err, check.IsNil) {
368                         for r := 0; r < npy.Shape[0]; r++ {
369                                 c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
370                         }
371                         c.Check(onehotcols, check.DeepEquals, []int32{
372                                 1, 4, 4, 4, 6, 6,
373                                 2, 2, 3, 4, 2, 3,
374                                 0, 0, 0, 0, 0, 0,
375                                 157299, 157299, 157299, 157299, 157299, 157299,
376                                 803273, 803273, 803273, 803273, 803273, 803273,
377                         })
378                 }
379         }
380 }
381
382 func (s *sliceSuite) TestSpanningTile(c *check.C) {
383         tmpdir := c.MkDir()
384         err := os.Mkdir(tmpdir+"/lib1", 0777)
385         c.Assert(err, check.IsNil)
386         err = os.Mkdir(tmpdir+"/lib2", 0777)
387         c.Assert(err, check.IsNil)
388         cwd, err := os.Getwd()
389         c.Assert(err, check.IsNil)
390         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
391         c.Assert(err, check.IsNil)
392         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
393         c.Assert(err, check.IsNil)
394
395         err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
396         c.Check(err, check.IsNil)
397
398         c.Log("=== import testdata/ref ===")
399         exited := (&importer{}).RunCommand("import", []string{
400                 "-local=true",
401                 "-tag-library", "testdata/tags",
402                 "-output-tiles",
403                 "-save-incomplete-tiles",
404                 "-o", tmpdir + "/lib1/library1.gob",
405                 "testdata/ref.fasta",
406         }, nil, os.Stderr, os.Stderr)
407         c.Assert(exited, check.Equals, 0)
408
409         c.Log("=== import testdata/spanningtile ===")
410         exited = (&importer{}).RunCommand("import", []string{
411                 "-local=true",
412                 "-tag-library", "testdata/tags",
413                 "-output-tiles",
414                 "-o", tmpdir + "/lib2/library2.gob",
415                 cwd + "/testdata/spanningtile",
416         }, nil, os.Stderr, os.Stderr)
417         c.Assert(exited, check.Equals, 0)
418
419         slicedir := c.MkDir()
420
421         c.Log("=== slice ===")
422         exited = (&slicecmd{}).RunCommand("slice", []string{
423                 "-local=true",
424                 "-output-dir=" + slicedir,
425                 "-tags-per-file=2",
426                 tmpdir + "/lib1",
427                 tmpdir + "/lib2",
428         }, nil, os.Stderr, os.Stderr)
429         c.Check(exited, check.Equals, 0)
430         out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
431         c.Logf("%s", out)
432
433         c.Log("=== dump ===")
434         {
435                 dumpdir := c.MkDir()
436                 exited = (&dump{}).RunCommand("dump", []string{
437                         "-local=true",
438                         "-tags=5,6",
439                         "-input-dir=" + slicedir,
440                         "-output-dir=" + dumpdir,
441                 }, nil, os.Stderr, os.Stderr)
442                 c.Check(exited, check.Equals, 0)
443                 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
444                 c.Logf("%s", out)
445                 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
446                 c.Assert(err, check.IsNil)
447                 c.Logf("%s", dumped)
448                 // spanning tile for tag 5 with A>G snp in tag 6
449                 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n5,1,0,chr2,225,.*AAAACTGATCCGAAAAAAATACAA.*`)
450                 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTGATCCAAAAAAAATACAA.*`)
451         }
452
453         c.Log("=== slice-numpy ===")
454         {
455                 npydir := c.MkDir()
456                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
457                         "-local=true",
458                         "-input-dir=" + slicedir,
459                         "-output-dir=" + npydir,
460                 }, nil, os.Stderr, os.Stderr)
461                 c.Check(exited, check.Equals, 0)
462                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
463                 c.Logf("%s", out)
464
465                 f, err := os.Open(npydir + "/matrix.0000.npy")
466                 c.Assert(err, check.IsNil)
467                 defer f.Close()
468                 npy, err := gonpy.NewReader(f)
469                 c.Assert(err, check.IsNil)
470                 c.Check(npy.Shape, check.DeepEquals, []int{1, 4})
471                 variants, err := npy.GetInt16()
472                 c.Check(variants, check.DeepEquals, []int16{-1, -1, 1, 1})
473
474                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
475                 c.Assert(err, check.IsNil)
476                 c.Logf("%s", annotations)
477
478                 f, err = os.Open(npydir + "/matrix.0002.npy")
479                 c.Assert(err, check.IsNil)
480                 defer f.Close()
481                 npy, err = gonpy.NewReader(f)
482                 c.Assert(err, check.IsNil)
483                 c.Check(npy.Shape, check.DeepEquals, []int{1, 4})
484                 variants, err = npy.GetInt16()
485                 c.Check(variants, check.DeepEquals, []int16{1, 1, 1, 2})
486
487                 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
488                 c.Assert(err, check.IsNil)
489                 c.Logf("%s", annotations)
490                 for _, s := range []string{
491                         "chr2:g\\.360A>G",
492                 } {
493                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
494                 }
495         }
496
497         c.Log("=== slice-numpy + regions ===")
498         {
499                 npydir := c.MkDir()
500                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
501                         "-local=true",
502                         "-regions=" + tmpdir + "/chr1-12-100.bed",
503                         "-input-dir=" + slicedir,
504                         "-output-dir=" + npydir,
505                         "-chunked-hgvs-matrix=true",
506                 }, nil, os.Stderr, os.Stderr)
507                 c.Check(exited, check.Equals, 0)
508                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
509                 c.Logf("%s", out)
510
511                 f, err := os.Open(npydir + "/matrix.0000.npy")
512                 c.Assert(err, check.IsNil)
513                 defer f.Close()
514                 npy, err := gonpy.NewReader(f)
515                 c.Assert(err, check.IsNil)
516                 c.Check(npy.Shape, check.DeepEquals, []int{1, 2})
517                 variants, err := npy.GetInt16()
518                 c.Check(variants, check.DeepEquals, []int16{-1, -1})
519
520                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
521                 c.Assert(err, check.IsNil)
522                 c.Check(string(annotations), check.Equals, "0,0,1,=,chr1,0,,,\n")
523
524                 for _, fnm := range []string{
525                         npydir + "/matrix.0001.annotations.csv",
526                         npydir + "/matrix.0002.annotations.csv",
527                 } {
528                         annotations, err := ioutil.ReadFile(fnm)
529                         c.Assert(err, check.IsNil)
530                         c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
531                 }
532         }
533
534         err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
535         c.Check(err, check.IsNil)
536
537         c.Log("=== slice-numpy + regions + merge ===")
538         {
539                 npydir := c.MkDir()
540                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
541                         "-local=true",
542                         "-regions=" + tmpdir + "/chr1and2-100-200.bed",
543                         "-input-dir=" + slicedir,
544                         "-output-dir=" + npydir,
545                         "-merge-output=true",
546                         "-single-hgvs-matrix=true",
547                 }, nil, os.Stderr, os.Stderr)
548                 c.Check(exited, check.Equals, 0)
549                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
550                 c.Logf("%s", out)
551
552                 f, err := os.Open(npydir + "/matrix.npy")
553                 c.Assert(err, check.IsNil)
554                 defer f.Close()
555                 npy, err := gonpy.NewReader(f)
556                 c.Assert(err, check.IsNil)
557                 c.Check(npy.Shape, check.DeepEquals, []int{1, 4})
558                 variants, err := npy.GetInt16()
559                 if c.Check(err, check.IsNil) {
560                         c.Check(variants, check.DeepEquals, []int16{
561                                 -1, -1, 1, 1,
562                         })
563                 }
564
565                 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
566                 c.Assert(err, check.IsNil)
567                 c.Check(string(annotations), check.Equals, "")
568         }
569
570         c.Log("=== slice-numpy + chunked hgvs matrix ===")
571         {
572                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
573 spanningtile/input1     1
574 `), 0600)
575                 c.Assert(err, check.IsNil)
576                 npydir := c.MkDir()
577                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
578                         "-local=true",
579                         "-chunked-hgvs-matrix=true",
580                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
581                         "-chi2-case-control-column=CC",
582                         "-chi2-p-value=1",
583                         "-min-coverage=0.75",
584                         "-input-dir=" + slicedir,
585                         "-output-dir=" + npydir,
586                 }, nil, os.Stderr, os.Stderr)
587                 c.Check(exited, check.Equals, 0)
588                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
589                 c.Logf("%s", out)
590
591                 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
592                 c.Assert(err, check.IsNil)
593                 c.Check(string(annotations), check.Equals, `0,chr2:g.360A>G
594 `)
595         }
596
597         c.Log("=== slice-numpy + onehotChunked ===")
598         {
599                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
600 spanningtile/input1     1
601 `), 0600)
602                 c.Assert(err, check.IsNil)
603                 npydir := c.MkDir()
604                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
605                         "-local=true",
606                         "-chunked-onehot=true",
607                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
608                         "-chi2-case-control-column=CC",
609                         "-chi2-p-value=1",
610                         "-min-coverage=0.75",
611                         "-input-dir=" + slicedir,
612                         "-output-dir=" + npydir,
613                 }, nil, os.Stderr, os.Stderr)
614                 c.Check(exited, check.Equals, 0)
615                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
616                 c.Logf("%s", out)
617
618                 f, err := os.Open(npydir + "/onehot.0002.npy")
619                 c.Assert(err, check.IsNil)
620                 defer f.Close()
621                 npy, err := gonpy.NewReader(f)
622                 c.Assert(err, check.IsNil)
623                 c.Check(npy.Shape, check.DeepEquals, []int{1, 2})
624                 onehot, err := npy.GetInt8()
625                 if c.Check(err, check.IsNil) {
626                         for r := 0; r < npy.Shape[0]; r++ {
627                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
628                         }
629                         c.Check(onehot, check.DeepEquals, []int8{
630                                 0, 1, // input1
631                         })
632                 }
633         }
634
635         c.Log("=== slice-numpy + onehotSingle ===")
636         {
637                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
638 spanningtile/input1     1
639 `), 0600)
640                 c.Assert(err, check.IsNil)
641                 npydir := c.MkDir()
642                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
643                         "-local=true",
644                         "-single-onehot=true",
645                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
646                         "-chi2-case-control-column=CC",
647                         "-chi2-p-value=1",
648                         "-min-coverage=0.75",
649                         "-input-dir=" + slicedir,
650                         "-output-dir=" + npydir,
651                         "-debug-tag=1",
652                 }, nil, os.Stderr, os.Stderr)
653                 c.Check(exited, check.Equals, 0)
654                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
655                 c.Logf("%s", out)
656
657                 f, err := os.Open(npydir + "/onehot.npy")
658                 c.Assert(err, check.IsNil)
659                 defer f.Close()
660                 npy, err := gonpy.NewReader(f)
661                 c.Assert(err, check.IsNil)
662                 c.Check(npy.Shape, check.DeepEquals, []int{2, 1})
663                 onehot, err := npy.GetUint32()
664                 if c.Check(err, check.IsNil) {
665                         for r := 0; r < npy.Shape[0]; r++ {
666                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
667                         }
668                         c.Check(onehot, check.DeepEquals, []uint32{
669                                 0, 1,
670                         })
671                 }
672
673                 f, err = os.Open(npydir + "/onehot-columns.npy")
674                 c.Assert(err, check.IsNil)
675                 defer f.Close()
676                 npy, err = gonpy.NewReader(f)
677                 c.Assert(err, check.IsNil)
678                 c.Check(npy.Shape, check.DeepEquals, []int{5, 2})
679                 onehotcols, err := npy.GetInt32()
680                 if c.Check(err, check.IsNil) {
681                         for r := 0; r < npy.Shape[0]; r++ {
682                                 c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
683                         }
684                         c.Check(onehotcols, check.DeepEquals, []int32{
685                                 5, 5,
686                                 2, 2,
687                                 1, 0,
688                                 1000000, 1000000,
689                                 0, 0,
690                         })
691                 }
692         }
693 }
694
695 func (s *sliceSuite) Test_tv2homhet(c *check.C) {
696         cmd := &sliceNumpy{
697                 cgnames:         []string{"sample1", "sample2", "sample3", "sample4"},
698                 chi2Cases:       []bool{false, true, true, false},
699                 chi2PValue:      .5,
700                 includeVariant1: true,
701                 minCoverage:     3,
702         }
703         cgs := map[string]CompactGenome{
704                 "sample1": CompactGenome{
705                         Variants: []tileVariantID{0, 0, 1, 1}, // hom tv=1
706                 },
707                 "sample2": CompactGenome{
708                         Variants: []tileVariantID{0, 0, 5, 5}, // hom tv=2
709                 },
710                 "sample3": CompactGenome{
711                         Variants: []tileVariantID{0, 0, 5, 1}, // het tv=1, tv=2
712                 },
713                 "sample4": CompactGenome{
714                         Variants: []tileVariantID{0, 0, 9, 9}, // hom tv=3
715                 },
716         }
717         maxv := tileVariantID(3)
718         remap := []tileVariantID{0, 1, 0, 0, 0, 2, 0, 0, 0, 3}
719         chunkstarttag := tagID(10)
720         fakevariant := TileVariant{Sequence: []byte("ACGT")}
721         seq := map[tagID][]TileVariant{}
722         for tag := tagID(10); tag < 12; tag++ {
723                 seq[tag-chunkstarttag] = []TileVariant{TileVariant{}, fakevariant, TileVariant{}, TileVariant{}, TileVariant{}, fakevariant}
724                 c.Logf("=== tag %d", tag)
725                 chunk, xref := cmd.tv2homhet(cgs, maxv, remap, tag, chunkstarttag, seq)
726                 c.Logf("chunk len=%d", len(chunk))
727                 for _, x := range chunk {
728                         c.Logf("%+v", x)
729                 }
730                 c.Logf("xref len=%d", len(xref))
731                 for _, x := range xref {
732                         c.Logf("%+v", x)
733                 }
734                 out := onehotcols2int8(chunk)
735                 c.Logf("onehotcols2int8(chunk) len=%d", len(out))
736                 for i := 0; i < len(out); i += len(chunk) {
737                         c.Logf("%+v", out[i:i+len(chunk)])
738                 }
739                 coords := onehotChunk2Indirect(chunk)
740                 c.Logf("onehotChunk2Indirect(chunk) len=%d", len(coords))
741                 for _, x := range coords {
742                         c.Logf("%+v", x)
743                 }
744         }
745 }