19527: Option to exclude non-case/control samples.
[lightning.git] / slice_test.go
1 // Copyright (C) The Lightning Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package lightning
6
7 import (
8         "io/ioutil"
9         "os"
10         "os/exec"
11
12         "github.com/kshedden/gonpy"
13         "gopkg.in/check.v1"
14 )
15
16 type sliceSuite struct{}
17
18 var _ = check.Suite(&sliceSuite{})
19
20 func (s *sliceSuite) TestImportAndSlice(c *check.C) {
21         tmpdir := c.MkDir()
22         err := os.Mkdir(tmpdir+"/lib1", 0777)
23         c.Assert(err, check.IsNil)
24         err = os.Mkdir(tmpdir+"/lib2", 0777)
25         c.Assert(err, check.IsNil)
26         err = os.Mkdir(tmpdir+"/lib3", 0777)
27         c.Assert(err, check.IsNil)
28         cwd, err := os.Getwd()
29         c.Assert(err, check.IsNil)
30         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
31         c.Assert(err, check.IsNil)
32         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
33         c.Assert(err, check.IsNil)
34
35         err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
36         c.Check(err, check.IsNil)
37
38         c.Log("=== import testdata/ref ===")
39         exited := (&importer{}).RunCommand("import", []string{
40                 "-local=true",
41                 "-tag-library", "testdata/tags",
42                 "-output-tiles",
43                 "-save-incomplete-tiles",
44                 "-o", tmpdir + "/lib1/library1.gob",
45                 "testdata/ref.fasta",
46         }, nil, os.Stderr, os.Stderr)
47         c.Assert(exited, check.Equals, 0)
48
49         c.Log("=== import testdata/pipeline1 ===")
50         exited = (&importer{}).RunCommand("import", []string{
51                 "-local=true",
52                 "-tag-library", "testdata/tags",
53                 "-output-tiles",
54                 "-o", tmpdir + "/lib2/library2.gob",
55                 tmpdir + "/pipeline1",
56         }, nil, os.Stderr, os.Stderr)
57         c.Assert(exited, check.Equals, 0)
58
59         c.Log("=== import pipeline1dup ===")
60         exited = (&importer{}).RunCommand("import", []string{
61                 "-local=true",
62                 "-tag-library", "testdata/tags",
63                 "-output-tiles",
64                 "-o", tmpdir + "/lib3/library3.gob",
65                 tmpdir + "/pipeline1dup",
66         }, nil, os.Stderr, os.Stderr)
67         c.Assert(exited, check.Equals, 0)
68
69         slicedir := c.MkDir()
70
71         c.Log("=== slice ===")
72         exited = (&slicecmd{}).RunCommand("slice", []string{
73                 "-local=true",
74                 "-output-dir=" + slicedir,
75                 "-tags-per-file=2",
76                 tmpdir + "/lib1",
77                 tmpdir + "/lib2",
78                 tmpdir + "/lib3",
79         }, nil, os.Stderr, os.Stderr)
80         c.Check(exited, check.Equals, 0)
81         out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
82         c.Logf("%s", out)
83
84         c.Log("=== dump ===")
85         {
86                 dumpdir := c.MkDir()
87                 exited = (&dump{}).RunCommand("dump", []string{
88                         "-local=true",
89                         "-tags=4,6,7",
90                         "-input-dir=" + slicedir,
91                         "-output-dir=" + dumpdir,
92                 }, nil, os.Stderr, os.Stderr)
93                 c.Check(exited, check.Equals, 0)
94                 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
95                 c.Logf("%s", out)
96                 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
97                 c.Assert(err, check.IsNil)
98                 c.Logf("%s", dumped)
99                 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTG.*`)
100         }
101
102         c.Log("=== slice-numpy ===")
103         {
104                 npydir := c.MkDir()
105                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
106                         "-local=true",
107                         "-input-dir=" + slicedir,
108                         "-output-dir=" + npydir,
109                 }, nil, os.Stderr, os.Stderr)
110                 c.Check(exited, check.Equals, 0)
111                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
112                 c.Logf("%s", out)
113
114                 f, err := os.Open(npydir + "/matrix.0000.npy")
115                 c.Assert(err, check.IsNil)
116                 defer f.Close()
117                 npy, err := gonpy.NewReader(f)
118                 c.Assert(err, check.IsNil)
119                 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
120                 variants, err := npy.GetInt16()
121                 c.Check(variants, check.DeepEquals, []int16{2, 1, 1, 2, -1, -1, 1, 1, 2, 1, 1, 2, -1, -1, 1, 1})
122
123                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
124                 c.Assert(err, check.IsNil)
125                 c.Logf("%s", annotations)
126                 for _, s := range []string{
127                         "chr1:g.161A>T",
128                         "chr1:g.178A>T",
129                         "chr1:g.1_3delinsGGC",
130                         "chr1:g.222_224del",
131                 } {
132                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
133                 }
134
135                 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
136                 c.Assert(err, check.IsNil)
137                 c.Logf("%s", annotations)
138                 for _, s := range []string{
139                         ",2,chr2:g.1_3delinsAAA",
140                         ",2,chr2:g.125_127delinsAAA",
141                         ",4,chr2:g.125_127delinsAAA",
142                 } {
143                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
144                 }
145         }
146
147         c.Log("=== slice-numpy + regions ===")
148         {
149                 npydir := c.MkDir()
150                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
151                         "-local=true",
152                         "-regions=" + tmpdir + "/chr1-12-100.bed",
153                         "-input-dir=" + slicedir,
154                         "-output-dir=" + npydir,
155                         "-chunked-hgvs-matrix=true",
156                 }, nil, os.Stderr, os.Stderr)
157                 c.Check(exited, check.Equals, 0)
158                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
159                 c.Logf("%s", out)
160
161                 f, err := os.Open(npydir + "/matrix.0000.npy")
162                 c.Assert(err, check.IsNil)
163                 defer f.Close()
164                 npy, err := gonpy.NewReader(f)
165                 c.Assert(err, check.IsNil)
166                 c.Check(npy.Shape, check.DeepEquals, []int{4, 2})
167                 variants, err := npy.GetInt16()
168                 c.Check(variants, check.DeepEquals, []int16{2, 1, -1, -1, 2, 1, -1, -1})
169
170                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
171                 c.Assert(err, check.IsNil)
172                 c.Logf("%s", annotations)
173                 for _, s := range []string{
174                         "chr1:g.161A>T",
175                         "chr1:g.178A>T",
176                         "chr1:g.1_3delinsGGC",
177                         "chr1:g.222_224del",
178                 } {
179                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
180                 }
181
182                 for _, fnm := range []string{
183                         npydir + "/matrix.0001.annotations.csv",
184                         npydir + "/matrix.0002.annotations.csv",
185                 } {
186                         annotations, err := ioutil.ReadFile(fnm)
187                         c.Assert(err, check.IsNil)
188                         c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
189                 }
190         }
191
192         err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
193         c.Check(err, check.IsNil)
194
195         c.Log("=== slice-numpy + regions + merge ===")
196         {
197                 npydir := c.MkDir()
198                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
199                         "-local=true",
200                         "-regions=" + tmpdir + "/chr1and2-100-200.bed",
201                         "-input-dir=" + slicedir,
202                         "-output-dir=" + npydir,
203                         "-merge-output=true",
204                         "-single-hgvs-matrix=true",
205                 }, nil, os.Stderr, os.Stderr)
206                 c.Check(exited, check.Equals, 0)
207                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
208                 c.Logf("%s", out)
209
210                 f, err := os.Open(npydir + "/matrix.npy")
211                 c.Assert(err, check.IsNil)
212                 defer f.Close()
213                 npy, err := gonpy.NewReader(f)
214                 c.Assert(err, check.IsNil)
215                 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
216                 variants, err := npy.GetInt16()
217                 if c.Check(err, check.IsNil) {
218                         c.Check(variants, check.DeepEquals, []int16{
219                                 2, 1, 3, 1,
220                                 -1, -1, 4, 2,
221                                 2, 1, 3, 1,
222                                 -1, -1, 4, 2,
223                         })
224                 }
225
226                 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
227                 c.Assert(err, check.IsNil)
228                 c.Logf("%s", annotations)
229                 for _, s := range []string{
230                         "0,0,1,chr1:g.161A>T",
231                         "0,0,1,chr1:g.178A>T",
232                         "4,1,2,chr2:g.125_127delinsAAA",
233                 } {
234                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
235                 }
236         }
237
238         c.Log("=== slice-numpy + chunked hgvs matrix ===")
239         {
240                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
241 pipeline1/input1        1
242 pipeline1/input2        0
243 pipeline1dup/input1     1
244 pipeline1dup/input2     0
245 `), 0600)
246                 c.Assert(err, check.IsNil)
247                 npydir := c.MkDir()
248                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
249                         "-local=true",
250                         "-chunked-hgvs-matrix=true",
251                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
252                         "-chi2-case-control-column=CC",
253                         "-chi2-p-value=0.5",
254                         "-min-coverage=0.75",
255                         "-input-dir=" + slicedir,
256                         "-output-dir=" + npydir,
257                 }, nil, os.Stderr, os.Stderr)
258                 c.Check(exited, check.Equals, 0)
259                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
260                 c.Logf("%s", out)
261
262                 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
263                 c.Assert(err, check.IsNil)
264                 c.Check(string(annotations), check.Equals, `0,chr2:g.1_3delinsAAA
265 1,chr2:g.125_127delinsAAA
266 2,chr2:g.241_245delinsAAAAA
267 3,chr2:g.291C>A
268 4,chr2:g.470_472del
269 5,chr2:g.471G>A
270 6,chr2:g.472G>A
271 `)
272         }
273
274         c.Log("=== slice-numpy + onehotChunked ===")
275         {
276                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
277 pipeline1/input1        1
278 pipeline1/input2        0
279 pipeline1dup/input1     1
280 pipeline1dup/input2     0
281 `), 0600)
282                 c.Assert(err, check.IsNil)
283                 npydir := c.MkDir()
284                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
285                         "-local=true",
286                         "-chunked-onehot=true",
287                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
288                         "-chi2-case-control-column=CC",
289                         "-chi2-p-value=0.5",
290                         "-min-coverage=0.75",
291                         "-input-dir=" + slicedir,
292                         "-output-dir=" + npydir,
293                 }, nil, os.Stderr, os.Stderr)
294                 c.Check(exited, check.Equals, 0)
295                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
296                 c.Logf("%s", out)
297
298                 f, err := os.Open(npydir + "/onehot.0002.npy")
299                 c.Assert(err, check.IsNil)
300                 defer f.Close()
301                 npy, err := gonpy.NewReader(f)
302                 c.Assert(err, check.IsNil)
303                 c.Check(npy.Shape, check.DeepEquals, []int{4, 3})
304                 onehot, err := npy.GetInt8()
305                 if c.Check(err, check.IsNil) {
306                         for r := 0; r < npy.Shape[0]; r++ {
307                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
308                         }
309                         c.Check(onehot, check.DeepEquals, []int8{
310                                 0, 1, 0, // input1
311                                 1, 0, 1, // input2
312                                 0, 1, 0, // dup/input1
313                                 1, 0, 1, // dup/input2
314                         })
315                 }
316         }
317
318         c.Log("=== slice-numpy + onehotSingle ===")
319         {
320                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
321 pipeline1/input1        1
322 pipeline1/input2        0
323 pipeline1dup/input1     1
324 pipeline1dup/input2     0
325 `), 0600)
326                 c.Assert(err, check.IsNil)
327                 npydir := c.MkDir()
328                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
329                         "-local=true",
330                         "-single-onehot=true",
331                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
332                         "-chi2-case-control-column=CC",
333                         "-chi2-p-value=0.5",
334                         "-min-coverage=0.75",
335                         "-input-dir=" + slicedir,
336                         "-output-dir=" + npydir,
337                         "-debug-tag=1",
338                 }, nil, os.Stderr, os.Stderr)
339                 c.Check(exited, check.Equals, 0)
340                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
341                 c.Logf("%s", out)
342
343                 f, err := os.Open(npydir + "/onehot.npy")
344                 c.Assert(err, check.IsNil)
345                 defer f.Close()
346                 npy, err := gonpy.NewReader(f)
347                 c.Assert(err, check.IsNil)
348                 c.Check(npy.Shape, check.DeepEquals, []int{2, 12})
349                 onehot, err := npy.GetUint32()
350                 if c.Check(err, check.IsNil) {
351                         for r := 0; r < npy.Shape[0]; r++ {
352                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
353                         }
354                         c.Check(onehot, check.DeepEquals, []uint32{
355                                 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 0, 2,
356                                 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,
357                         })
358                 }
359
360                 f, err = os.Open(npydir + "/onehot-columns.npy")
361                 c.Assert(err, check.IsNil)
362                 defer f.Close()
363                 npy, err = gonpy.NewReader(f)
364                 c.Assert(err, check.IsNil)
365                 c.Check(npy.Shape, check.DeepEquals, []int{5, 6})
366                 onehotcols, err := npy.GetInt32()
367                 if c.Check(err, check.IsNil) {
368                         for r := 0; r < npy.Shape[0]; r++ {
369                                 c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
370                         }
371                         c.Check(onehotcols, check.DeepEquals, []int32{
372                                 1, 4, 4, 4, 6, 6,
373                                 2, 2, 3, 4, 2, 3,
374                                 0, 0, 0, 0, 0, 0,
375                                 157299, 157299, 157299, 157299, 157299, 157299,
376                                 803273, 803273, 803273, 803273, 803273, 803273,
377                         })
378                 }
379         }
380 }
381
382 func (s *sliceSuite) TestSpanningTile(c *check.C) {
383         tmpdir := c.MkDir()
384         err := os.Mkdir(tmpdir+"/lib1", 0777)
385         c.Assert(err, check.IsNil)
386         err = os.Mkdir(tmpdir+"/lib2", 0777)
387         c.Assert(err, check.IsNil)
388         cwd, err := os.Getwd()
389         c.Assert(err, check.IsNil)
390         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
391         c.Assert(err, check.IsNil)
392         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
393         c.Assert(err, check.IsNil)
394
395         err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
396         c.Check(err, check.IsNil)
397
398         c.Log("=== import testdata/ref ===")
399         exited := (&importer{}).RunCommand("import", []string{
400                 "-local=true",
401                 "-tag-library", "testdata/tags",
402                 "-output-tiles",
403                 "-save-incomplete-tiles",
404                 "-o", tmpdir + "/lib1/library1.gob",
405                 "testdata/ref.fasta",
406         }, nil, os.Stderr, os.Stderr)
407         c.Assert(exited, check.Equals, 0)
408
409         c.Log("=== import testdata/spanningtile ===")
410         exited = (&importer{}).RunCommand("import", []string{
411                 "-local=true",
412                 "-tag-library", "testdata/tags",
413                 "-output-tiles",
414                 "-o", tmpdir + "/lib2/library2.gob",
415                 cwd + "/testdata/spanningtile",
416         }, nil, os.Stderr, os.Stderr)
417         c.Assert(exited, check.Equals, 0)
418
419         slicedir := c.MkDir()
420
421         c.Log("=== slice ===")
422         exited = (&slicecmd{}).RunCommand("slice", []string{
423                 "-local=true",
424                 "-output-dir=" + slicedir,
425                 "-tags-per-file=2",
426                 tmpdir + "/lib1",
427                 tmpdir + "/lib2",
428         }, nil, os.Stderr, os.Stderr)
429         c.Check(exited, check.Equals, 0)
430         out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
431         c.Logf("%s", out)
432
433         c.Log("=== dump ===")
434         {
435                 dumpdir := c.MkDir()
436                 exited = (&dump{}).RunCommand("dump", []string{
437                         "-local=true",
438                         "-tags=5,6",
439                         "-input-dir=" + slicedir,
440                         "-output-dir=" + dumpdir,
441                 }, nil, os.Stderr, os.Stderr)
442                 c.Check(exited, check.Equals, 0)
443                 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
444                 c.Logf("%s", out)
445                 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
446                 c.Assert(err, check.IsNil)
447                 c.Logf("%s", dumped)
448                 // spanning tile for tag 5 with A>G snp in tag 6
449                 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n5,2,0,chr2,225,.*AAAACTGATCCGAAAAAAATACAA.*`)
450                 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTGATCCAAAAAAAATACAA.*`)
451         }
452
453         c.Log("=== slice-numpy ===")
454         {
455                 npydir := c.MkDir()
456                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
457                         "-local=true",
458                         "-input-dir=" + slicedir,
459                         "-output-dir=" + npydir,
460                 }, nil, os.Stderr, os.Stderr)
461                 c.Check(exited, check.Equals, 0)
462                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
463                 c.Logf("%s", out)
464
465                 f, err := os.Open(npydir + "/matrix.0000.npy")
466                 c.Assert(err, check.IsNil)
467                 defer f.Close()
468                 npy, err := gonpy.NewReader(f)
469                 c.Assert(err, check.IsNil)
470                 c.Check(npy.Shape, check.DeepEquals, []int{2, 4})
471                 variants, err := npy.GetInt16()
472                 c.Check(variants, check.DeepEquals, []int16{
473                         -1, -1, 1, 1,
474                         -1, -1, 1, 2,
475                 })
476
477                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
478                 c.Assert(err, check.IsNil)
479                 c.Logf("%s", annotations)
480
481                 f, err = os.Open(npydir + "/matrix.0002.npy")
482                 c.Assert(err, check.IsNil)
483                 defer f.Close()
484                 npy, err = gonpy.NewReader(f)
485                 c.Assert(err, check.IsNil)
486                 c.Check(npy.Shape, check.DeepEquals, []int{2, 4})
487                 variants, err = npy.GetInt16()
488                 c.Check(variants, check.DeepEquals, []int16{
489                         1, 1, 2, 1,
490                         1, 1, 1, 1,
491                 })
492
493                 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
494                 c.Assert(err, check.IsNil)
495                 c.Logf("%s", annotations)
496                 for _, s := range []string{
497                         "chr2:g\\.360A>G",
498                 } {
499                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
500                 }
501         }
502
503         c.Log("=== slice-numpy + regions ===")
504         {
505                 npydir := c.MkDir()
506                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
507                         "-local=true",
508                         "-regions=" + tmpdir + "/chr1-12-100.bed",
509                         "-input-dir=" + slicedir,
510                         "-output-dir=" + npydir,
511                         "-chunked-hgvs-matrix=true",
512                 }, nil, os.Stderr, os.Stderr)
513                 c.Check(exited, check.Equals, 0)
514                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
515                 c.Logf("%s", out)
516
517                 f, err := os.Open(npydir + "/matrix.0000.npy")
518                 c.Assert(err, check.IsNil)
519                 defer f.Close()
520                 npy, err := gonpy.NewReader(f)
521                 c.Assert(err, check.IsNil)
522                 c.Check(npy.Shape, check.DeepEquals, []int{2, 2})
523                 variants, err := npy.GetInt16()
524                 c.Check(variants, check.DeepEquals, []int16{-1, -1, -1, -1})
525
526                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
527                 c.Assert(err, check.IsNil)
528                 c.Check(string(annotations), check.Equals, "0,0,1,=,chr1,0,,,\n")
529
530                 for _, fnm := range []string{
531                         npydir + "/matrix.0001.annotations.csv",
532                         npydir + "/matrix.0002.annotations.csv",
533                 } {
534                         annotations, err := ioutil.ReadFile(fnm)
535                         c.Assert(err, check.IsNil)
536                         c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
537                 }
538         }
539
540         err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
541         c.Check(err, check.IsNil)
542
543         c.Log("=== slice-numpy + regions + merge ===")
544         {
545                 npydir := c.MkDir()
546                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
547                         "-local=true",
548                         "-regions=" + tmpdir + "/chr1and2-100-200.bed",
549                         "-input-dir=" + slicedir,
550                         "-output-dir=" + npydir,
551                         "-merge-output=true",
552                         "-single-hgvs-matrix=true",
553                 }, nil, os.Stderr, os.Stderr)
554                 c.Check(exited, check.Equals, 0)
555                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
556                 c.Logf("%s", out)
557
558                 f, err := os.Open(npydir + "/matrix.npy")
559                 c.Assert(err, check.IsNil)
560                 defer f.Close()
561                 npy, err := gonpy.NewReader(f)
562                 c.Assert(err, check.IsNil)
563                 c.Check(npy.Shape, check.DeepEquals, []int{2, 4})
564                 variants, err := npy.GetInt16()
565                 if c.Check(err, check.IsNil) {
566                         c.Check(variants, check.DeepEquals, []int16{
567                                 -1, -1, 1, 1,
568                                 -1, -1, 1, 1,
569                         })
570                 }
571
572                 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
573                 c.Assert(err, check.IsNil)
574                 c.Check(string(annotations), check.Equals, "")
575         }
576
577         c.Log("=== slice-numpy + chunked hgvs matrix ===")
578         {
579                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
580 spanningtile/input1     1
581 `), 0600)
582                 c.Assert(err, check.IsNil)
583                 npydir := c.MkDir()
584                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
585                         "-local=true",
586                         "-chunked-hgvs-matrix=true",
587                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
588                         "-chi2-case-control-column=CC",
589                         "-chi2-p-value=1",
590                         "-min-coverage=0.75",
591                         "-input-dir=" + slicedir,
592                         "-output-dir=" + npydir,
593                 }, nil, os.Stderr, os.Stderr)
594                 c.Check(exited, check.Equals, 0)
595                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
596                 c.Logf("%s", out)
597
598                 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
599                 c.Assert(err, check.IsNil)
600                 c.Check(string(annotations), check.Equals, `0,chr2:g.360A>G
601 `)
602         }
603
604         c.Log("=== slice-numpy + onehotChunked ===")
605         {
606                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
607 spanningtile/input1     1
608 `), 0600)
609                 c.Assert(err, check.IsNil)
610                 npydir := c.MkDir()
611                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
612                         "-local=true",
613                         "-chunked-onehot=true",
614                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
615                         "-chi2-case-control-column=CC",
616                         "-chi2-p-value=1",
617                         "-min-coverage=0.75",
618                         "-input-dir=" + slicedir,
619                         "-output-dir=" + npydir,
620                 }, nil, os.Stderr, os.Stderr)
621                 c.Check(exited, check.Equals, 0)
622                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
623                 c.Logf("%s", out)
624
625                 f, err := os.Open(npydir + "/onehot.0002.npy")
626                 c.Assert(err, check.IsNil)
627                 defer f.Close()
628                 npy, err := gonpy.NewReader(f)
629                 c.Assert(err, check.IsNil)
630                 c.Check(npy.Shape, check.DeepEquals, []int{1, 2})
631                 onehot, err := npy.GetInt8()
632                 if c.Check(err, check.IsNil) {
633                         for r := 0; r < npy.Shape[0]; r++ {
634                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
635                         }
636                         c.Check(onehot, check.DeepEquals, []int8{
637                                 0, 1, // input1
638                         })
639                 }
640         }
641
642         c.Log("=== slice-numpy + onehotSingle ===")
643         {
644                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
645 spanningtile/input1     1
646 `), 0600)
647                 c.Assert(err, check.IsNil)
648                 npydir := c.MkDir()
649                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
650                         "-local=true",
651                         "-single-onehot=true",
652                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
653                         "-chi2-case-control-column=CC",
654                         "-chi2-p-value=1",
655                         "-min-coverage=0.75",
656                         "-input-dir=" + slicedir,
657                         "-output-dir=" + npydir,
658                         "-debug-tag=1",
659                 }, nil, os.Stderr, os.Stderr)
660                 c.Check(exited, check.Equals, 0)
661                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
662                 c.Logf("%s", out)
663
664                 f, err := os.Open(npydir + "/onehot.npy")
665                 c.Assert(err, check.IsNil)
666                 defer f.Close()
667                 npy, err := gonpy.NewReader(f)
668                 c.Assert(err, check.IsNil)
669                 c.Check(npy.Shape, check.DeepEquals, []int{2, 1})
670                 onehot, err := npy.GetUint32()
671                 if c.Check(err, check.IsNil) {
672                         for r := 0; r < npy.Shape[0]; r++ {
673                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
674                         }
675                         c.Check(onehot, check.DeepEquals, []uint32{0, 3})
676                 }
677
678                 f, err = os.Open(npydir + "/onehot-columns.npy")
679                 c.Assert(err, check.IsNil)
680                 defer f.Close()
681                 npy, err = gonpy.NewReader(f)
682                 c.Assert(err, check.IsNil)
683                 c.Check(npy.Shape, check.DeepEquals, []int{5, 4})
684                 onehotcols, err := npy.GetInt32()
685                 if c.Check(err, check.IsNil) {
686                         for r := 0; r < npy.Shape[0]; r++ {
687                                 c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
688                         }
689                         c.Check(onehotcols, check.DeepEquals, []int32{
690                                 1, 1, 5, 5,
691                                 2, 2, 2, 2,
692                                 1, 0, 1, 0,
693                                 1000000, 1000000, 1000000, 1000000,
694                                 0, 0, 0, 0,
695                         })
696                 }
697         }
698 }
699
700 func (s *sliceSuite) Test_tv2homhet(c *check.C) {
701         cmd := &sliceNumpy{
702                 cgnames:         []string{"sample1", "sample2", "sample3", "sample4"},
703                 chi2Cases:       []bool{false, true, true, false},
704                 chi2PValue:      .5,
705                 includeVariant1: true,
706                 minCoverage:     3,
707         }
708         cgs := map[string]CompactGenome{
709                 "sample1": CompactGenome{
710                         Variants: []tileVariantID{0, 0, 1, 1}, // hom tv=1
711                 },
712                 "sample2": CompactGenome{
713                         Variants: []tileVariantID{0, 0, 5, 5}, // hom tv=2
714                 },
715                 "sample3": CompactGenome{
716                         Variants: []tileVariantID{0, 0, 5, 1}, // het tv=1, tv=2
717                 },
718                 "sample4": CompactGenome{
719                         Variants: []tileVariantID{0, 0, 9, 9}, // hom tv=3
720                 },
721         }
722         maxv := tileVariantID(3)
723         remap := []tileVariantID{0, 1, 0, 0, 0, 2, 0, 0, 0, 3}
724         chunkstarttag := tagID(10)
725         fakevariant := TileVariant{Sequence: []byte("ACGT")}
726         seq := map[tagID][]TileVariant{}
727         for tag := tagID(10); tag < 12; tag++ {
728                 seq[tag-chunkstarttag] = []TileVariant{TileVariant{}, fakevariant, TileVariant{}, TileVariant{}, TileVariant{}, fakevariant}
729                 c.Logf("=== tag %d", tag)
730                 chunk, xref := cmd.tv2homhet(cgs, maxv, remap, tag, chunkstarttag, seq)
731                 c.Logf("chunk len=%d", len(chunk))
732                 for _, x := range chunk {
733                         c.Logf("%+v", x)
734                 }
735                 c.Logf("xref len=%d", len(xref))
736                 for _, x := range xref {
737                         c.Logf("%+v", x)
738                 }
739                 out := onehotcols2int8(chunk)
740                 c.Logf("onehotcols2int8(chunk) len=%d", len(out))
741                 for i := 0; i < len(out); i += len(chunk) {
742                         c.Logf("%+v", out[i:i+len(chunk)])
743                 }
744                 coords := onehotChunk2Indirect(chunk)
745                 c.Logf("onehotChunk2Indirect(chunk) len=%d", len(coords))
746                 for _, x := range coords {
747                         c.Logf("%+v", x)
748                 }
749         }
750 }