1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
12 "github.com/kshedden/gonpy"
16 type sliceSuite struct{}
18 var _ = check.Suite(&sliceSuite{})
20 func (s *sliceSuite) TestImportAndSlice(c *check.C) {
22 err := os.Mkdir(tmpdir+"/lib1", 0777)
23 c.Assert(err, check.IsNil)
24 err = os.Mkdir(tmpdir+"/lib2", 0777)
25 c.Assert(err, check.IsNil)
26 err = os.Mkdir(tmpdir+"/lib3", 0777)
27 c.Assert(err, check.IsNil)
28 cwd, err := os.Getwd()
29 c.Assert(err, check.IsNil)
30 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
31 c.Assert(err, check.IsNil)
32 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
33 c.Assert(err, check.IsNil)
35 err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
36 c.Check(err, check.IsNil)
38 c.Log("=== import testdata/ref ===")
39 exited := (&importer{}).RunCommand("import", []string{
41 "-tag-library", "testdata/tags",
43 "-save-incomplete-tiles",
44 "-o", tmpdir + "/lib1/library1.gob",
46 }, nil, os.Stderr, os.Stderr)
47 c.Assert(exited, check.Equals, 0)
49 c.Log("=== import testdata/pipeline1 ===")
50 exited = (&importer{}).RunCommand("import", []string{
52 "-tag-library", "testdata/tags",
54 "-o", tmpdir + "/lib2/library2.gob",
55 tmpdir + "/pipeline1",
56 }, nil, os.Stderr, os.Stderr)
57 c.Assert(exited, check.Equals, 0)
59 c.Log("=== import pipeline1dup ===")
60 exited = (&importer{}).RunCommand("import", []string{
62 "-tag-library", "testdata/tags",
64 "-o", tmpdir + "/lib3/library3.gob",
65 tmpdir + "/pipeline1dup",
66 }, nil, os.Stderr, os.Stderr)
67 c.Assert(exited, check.Equals, 0)
71 c.Log("=== slice ===")
72 exited = (&slicecmd{}).RunCommand("slice", []string{
74 "-output-dir=" + slicedir,
79 }, nil, os.Stderr, os.Stderr)
80 c.Check(exited, check.Equals, 0)
81 out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
87 exited = (&dump{}).RunCommand("dump", []string{
90 "-input-dir=" + slicedir,
91 "-output-dir=" + dumpdir,
92 }, nil, os.Stderr, os.Stderr)
93 c.Check(exited, check.Equals, 0)
94 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
96 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
97 c.Assert(err, check.IsNil)
99 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTG.*`)
102 c.Log("=== slice-numpy ===")
105 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
107 "-input-dir=" + slicedir,
108 "-output-dir=" + npydir,
109 }, nil, os.Stderr, os.Stderr)
110 c.Check(exited, check.Equals, 0)
111 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
114 f, err := os.Open(npydir + "/matrix.0000.npy")
115 c.Assert(err, check.IsNil)
117 npy, err := gonpy.NewReader(f)
118 c.Assert(err, check.IsNil)
119 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
120 variants, err := npy.GetInt16()
121 c.Check(variants, check.DeepEquals, []int16{2, 1, 1, 2, -1, -1, 1, 1, 2, 1, 1, 2, -1, -1, 1, 1})
123 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
124 c.Assert(err, check.IsNil)
125 c.Logf("%s", annotations)
126 for _, s := range []string{
129 "chr1:g.1_3delinsGGC",
132 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
135 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
136 c.Assert(err, check.IsNil)
137 c.Logf("%s", annotations)
138 for _, s := range []string{
139 ",2,chr2:g.1_3delinsAAA",
140 ",2,chr2:g.125_127delinsAAA",
141 ",4,chr2:g.125_127delinsAAA",
143 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
147 c.Log("=== slice-numpy + regions ===")
150 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
152 "-regions=" + tmpdir + "/chr1-12-100.bed",
153 "-input-dir=" + slicedir,
154 "-output-dir=" + npydir,
155 "-chunked-hgvs-matrix=true",
156 }, nil, os.Stderr, os.Stderr)
157 c.Check(exited, check.Equals, 0)
158 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
161 f, err := os.Open(npydir + "/matrix.0000.npy")
162 c.Assert(err, check.IsNil)
164 npy, err := gonpy.NewReader(f)
165 c.Assert(err, check.IsNil)
166 c.Check(npy.Shape, check.DeepEquals, []int{4, 2})
167 variants, err := npy.GetInt16()
168 c.Check(variants, check.DeepEquals, []int16{2, 1, -1, -1, 2, 1, -1, -1})
170 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
171 c.Assert(err, check.IsNil)
172 c.Logf("%s", annotations)
173 for _, s := range []string{
176 "chr1:g.1_3delinsGGC",
179 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
182 for _, fnm := range []string{
183 npydir + "/matrix.0001.annotations.csv",
184 npydir + "/matrix.0002.annotations.csv",
186 annotations, err := ioutil.ReadFile(fnm)
187 c.Assert(err, check.IsNil)
188 c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
192 err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
193 c.Check(err, check.IsNil)
195 c.Log("=== slice-numpy + regions + merge ===")
198 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
200 "-regions=" + tmpdir + "/chr1and2-100-200.bed",
201 "-input-dir=" + slicedir,
202 "-output-dir=" + npydir,
203 "-merge-output=true",
204 "-single-hgvs-matrix=true",
205 }, nil, os.Stderr, os.Stderr)
206 c.Check(exited, check.Equals, 0)
207 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
210 f, err := os.Open(npydir + "/matrix.npy")
211 c.Assert(err, check.IsNil)
213 npy, err := gonpy.NewReader(f)
214 c.Assert(err, check.IsNil)
215 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
216 variants, err := npy.GetInt16()
217 if c.Check(err, check.IsNil) {
218 c.Check(variants, check.DeepEquals, []int16{
226 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
227 c.Assert(err, check.IsNil)
228 c.Logf("%s", annotations)
229 for _, s := range []string{
230 "0,0,1,chr1:g.161A>T",
231 "0,0,1,chr1:g.178A>T",
232 "4,1,2,chr2:g.125_127delinsAAA",
234 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
238 c.Log("=== slice-numpy + chunked hgvs matrix ===")
240 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
243 pipeline1dup/input1 1
244 pipeline1dup/input2 0
246 c.Assert(err, check.IsNil)
248 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
250 "-case-control-file=" + tmpdir + "/casecontrol.tsv",
251 "-case-control-column=CC",
252 "-input-dir=" + slicedir,
253 "-output-dir=" + tmpdir,
254 }, nil, os.Stderr, os.Stderr)
255 c.Check(exited, check.Equals, 0)
258 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
260 "-chunked-hgvs-matrix=true",
261 "-samples=" + tmpdir + "/samples.csv",
263 "-min-coverage=0.75",
264 "-input-dir=" + slicedir,
265 "-output-dir=" + npydir,
266 }, nil, os.Stderr, os.Stderr)
267 c.Check(exited, check.Equals, 0)
268 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
271 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
272 c.Assert(err, check.IsNil)
273 c.Check(string(annotations), check.Equals, `0,chr2:g.1_3delinsAAA
274 1,chr2:g.125_127delinsAAA
275 2,chr2:g.241_245delinsAAAAA
283 c.Log("=== slice-numpy + onehotChunked ===")
285 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
288 pipeline1dup/input1 1
289 pipeline1dup/input2 0
291 c.Assert(err, check.IsNil)
293 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
295 "-case-control-file=" + tmpdir + "/casecontrol.tsv",
296 "-case-control-column=CC",
297 "-input-dir=" + slicedir,
298 "-output-dir=" + tmpdir,
299 }, nil, os.Stderr, os.Stderr)
300 c.Check(exited, check.Equals, 0)
303 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
305 "-chunked-onehot=true",
306 "-samples=" + tmpdir + "/samples.csv",
308 "-min-coverage=0.75",
309 "-input-dir=" + slicedir,
310 "-output-dir=" + npydir,
311 }, nil, os.Stderr, os.Stderr)
312 c.Check(exited, check.Equals, 0)
313 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
316 f, err := os.Open(npydir + "/onehot.0002.npy")
317 c.Assert(err, check.IsNil)
319 npy, err := gonpy.NewReader(f)
320 c.Assert(err, check.IsNil)
321 c.Check(npy.Shape, check.DeepEquals, []int{4, 3})
322 onehot, err := npy.GetInt8()
323 if c.Check(err, check.IsNil) {
324 for r := 0; r < npy.Shape[0]; r++ {
325 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
327 c.Check(onehot, check.DeepEquals, []int8{
330 0, 1, 0, // dup/input1
331 1, 0, 1, // dup/input2
336 c.Log("=== slice-numpy + onehotSingle ===")
338 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
341 pipeline1dup/input1 1
342 pipeline1dup/input2 0
344 c.Assert(err, check.IsNil)
346 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
348 "-case-control-file=" + tmpdir + "/casecontrol.tsv",
349 "-case-control-column=CC",
350 "-input-dir=" + slicedir,
351 "-output-dir=" + tmpdir,
352 }, nil, os.Stderr, os.Stderr)
353 c.Check(exited, check.Equals, 0)
356 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
358 "-single-onehot=true",
359 "-samples=" + tmpdir + "/samples.csv",
361 "-min-coverage=0.75",
362 "-input-dir=" + slicedir,
363 "-output-dir=" + npydir,
365 }, nil, os.Stderr, os.Stderr)
366 c.Check(exited, check.Equals, 0)
367 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
370 f, err := os.Open(npydir + "/onehot.npy")
371 c.Assert(err, check.IsNil)
373 npy, err := gonpy.NewReader(f)
374 c.Assert(err, check.IsNil)
375 c.Check(npy.Shape, check.DeepEquals, []int{2, 12})
376 onehot, err := npy.GetUint32()
377 if c.Check(err, check.IsNil) {
378 for r := 0; r < npy.Shape[0]; r++ {
379 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
381 c.Check(onehot, check.DeepEquals, []uint32{
382 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 0, 2,
383 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,
387 f, err = os.Open(npydir + "/onehot-columns.npy")
388 c.Assert(err, check.IsNil)
390 npy, err = gonpy.NewReader(f)
391 c.Assert(err, check.IsNil)
392 c.Check(npy.Shape, check.DeepEquals, []int{5, 6})
393 onehotcols, err := npy.GetInt32()
394 if c.Check(err, check.IsNil) {
395 for r := 0; r < npy.Shape[0]; r++ {
396 c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
398 c.Check(onehotcols, check.DeepEquals, []int32{
402 157299, 157299, 157299, 157299, 157299, 157299,
403 803273, 803273, 803273, 803273, 803273, 803273,
409 func (s *sliceSuite) TestSpanningTile(c *check.C) {
411 err := os.Mkdir(tmpdir+"/lib1", 0777)
412 c.Assert(err, check.IsNil)
413 err = os.Mkdir(tmpdir+"/lib2", 0777)
414 c.Assert(err, check.IsNil)
415 cwd, err := os.Getwd()
416 c.Assert(err, check.IsNil)
417 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
418 c.Assert(err, check.IsNil)
419 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
420 c.Assert(err, check.IsNil)
422 err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
423 c.Check(err, check.IsNil)
425 c.Log("=== import testdata/ref ===")
426 exited := (&importer{}).RunCommand("import", []string{
428 "-tag-library", "testdata/tags",
430 "-save-incomplete-tiles",
431 "-o", tmpdir + "/lib1/library1.gob",
432 "testdata/ref.fasta",
433 }, nil, os.Stderr, os.Stderr)
434 c.Assert(exited, check.Equals, 0)
436 c.Log("=== import testdata/spanningtile ===")
437 exited = (&importer{}).RunCommand("import", []string{
439 "-tag-library", "testdata/tags",
441 "-o", tmpdir + "/lib2/library2.gob",
442 cwd + "/testdata/spanningtile",
443 }, nil, os.Stderr, os.Stderr)
444 c.Assert(exited, check.Equals, 0)
446 slicedir := c.MkDir()
448 c.Log("=== slice ===")
449 exited = (&slicecmd{}).RunCommand("slice", []string{
451 "-output-dir=" + slicedir,
455 }, nil, os.Stderr, os.Stderr)
456 c.Check(exited, check.Equals, 0)
457 out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
460 c.Log("=== dump ===")
463 exited = (&dump{}).RunCommand("dump", []string{
466 "-input-dir=" + slicedir,
467 "-output-dir=" + dumpdir,
468 }, nil, os.Stderr, os.Stderr)
469 c.Check(exited, check.Equals, 0)
470 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
472 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
473 c.Assert(err, check.IsNil)
475 // spanning tile for tag 5 with A>G snp in tag 6
476 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n5,2,0,chr2,225,.*AAAACTGATCCGAAAAAAATACAA.*`)
477 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTGATCCAAAAAAAATACAA.*`)
480 c.Log("=== slice-numpy ===")
483 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
485 "-input-dir=" + slicedir,
486 "-output-dir=" + npydir,
487 }, nil, os.Stderr, os.Stderr)
488 c.Check(exited, check.Equals, 0)
489 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
492 f, err := os.Open(npydir + "/matrix.0000.npy")
493 c.Assert(err, check.IsNil)
495 npy, err := gonpy.NewReader(f)
496 c.Assert(err, check.IsNil)
497 c.Check(npy.Shape, check.DeepEquals, []int{2, 4})
498 variants, err := npy.GetInt16()
499 c.Check(variants, check.DeepEquals, []int16{
504 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
505 c.Assert(err, check.IsNil)
506 c.Logf("%s", annotations)
508 f, err = os.Open(npydir + "/matrix.0002.npy")
509 c.Assert(err, check.IsNil)
511 npy, err = gonpy.NewReader(f)
512 c.Assert(err, check.IsNil)
513 c.Check(npy.Shape, check.DeepEquals, []int{2, 4})
514 variants, err = npy.GetInt16()
515 c.Check(variants, check.DeepEquals, []int16{
520 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
521 c.Assert(err, check.IsNil)
522 c.Logf("%s", annotations)
523 for _, s := range []string{
526 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
530 c.Log("=== slice-numpy + regions ===")
533 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
535 "-regions=" + tmpdir + "/chr1-12-100.bed",
536 "-input-dir=" + slicedir,
537 "-output-dir=" + npydir,
538 "-chunked-hgvs-matrix=true",
539 }, nil, os.Stderr, os.Stderr)
540 c.Check(exited, check.Equals, 0)
541 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
544 f, err := os.Open(npydir + "/matrix.0000.npy")
545 c.Assert(err, check.IsNil)
547 npy, err := gonpy.NewReader(f)
548 c.Assert(err, check.IsNil)
549 c.Check(npy.Shape, check.DeepEquals, []int{2, 2})
550 variants, err := npy.GetInt16()
551 c.Check(variants, check.DeepEquals, []int16{-1, -1, -1, -1})
553 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
554 c.Assert(err, check.IsNil)
555 c.Check(string(annotations), check.Equals, "0,0,1,=,chr1,0,,,\n")
557 for _, fnm := range []string{
558 npydir + "/matrix.0001.annotations.csv",
559 npydir + "/matrix.0002.annotations.csv",
561 annotations, err := ioutil.ReadFile(fnm)
562 c.Assert(err, check.IsNil)
563 c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
567 err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
568 c.Check(err, check.IsNil)
570 c.Log("=== slice-numpy + regions + merge ===")
573 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
575 "-regions=" + tmpdir + "/chr1and2-100-200.bed",
576 "-input-dir=" + slicedir,
577 "-output-dir=" + npydir,
578 "-merge-output=true",
579 "-single-hgvs-matrix=true",
580 }, nil, os.Stderr, os.Stderr)
581 c.Check(exited, check.Equals, 0)
582 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
585 f, err := os.Open(npydir + "/matrix.npy")
586 c.Assert(err, check.IsNil)
588 npy, err := gonpy.NewReader(f)
589 c.Assert(err, check.IsNil)
590 c.Check(npy.Shape, check.DeepEquals, []int{2, 4})
591 variants, err := npy.GetInt16()
592 if c.Check(err, check.IsNil) {
593 c.Check(variants, check.DeepEquals, []int16{
599 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
600 c.Assert(err, check.IsNil)
601 c.Check(string(annotations), check.Equals, "")
604 c.Log("=== slice-numpy + chunked hgvs matrix ===")
606 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
607 spanningtile/input1 1
609 c.Assert(err, check.IsNil)
611 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
613 "-case-control-file=" + tmpdir + "/casecontrol.tsv",
614 "-case-control-column=CC",
615 "-input-dir=" + slicedir,
616 "-output-dir=" + tmpdir,
617 }, nil, os.Stderr, os.Stderr)
618 c.Check(exited, check.Equals, 0)
621 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
623 "-chunked-hgvs-matrix=true",
624 "-samples=" + tmpdir + "/samples.csv",
626 "-min-coverage=0.75",
627 "-input-dir=" + slicedir,
628 "-output-dir=" + npydir,
629 }, nil, os.Stderr, os.Stderr)
630 c.Check(exited, check.Equals, 0)
631 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
634 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
635 c.Assert(err, check.IsNil)
636 c.Check(string(annotations), check.Equals, `0,chr2:g.360A>G
640 c.Log("=== slice-numpy + onehotChunked ===")
642 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
643 spanningtile/input1 1
645 c.Assert(err, check.IsNil)
647 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
649 "-case-control-file=" + tmpdir + "/casecontrol.tsv",
650 "-case-control-column=CC",
651 "-input-dir=" + slicedir,
652 "-output-dir=" + tmpdir,
653 }, nil, os.Stderr, os.Stderr)
654 c.Check(exited, check.Equals, 0)
657 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
659 "-chunked-onehot=true",
660 "-samples=" + tmpdir + "/samples.csv",
662 "-min-coverage=0.75",
663 "-input-dir=" + slicedir,
664 "-output-dir=" + npydir,
665 }, nil, os.Stderr, os.Stderr)
666 c.Check(exited, check.Equals, 0)
667 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
670 f, err := os.Open(npydir + "/onehot.0002.npy")
671 c.Assert(err, check.IsNil)
673 npy, err := gonpy.NewReader(f)
674 c.Assert(err, check.IsNil)
675 c.Check(npy.Shape, check.DeepEquals, []int{1, 2})
676 onehot, err := npy.GetInt8()
677 if c.Check(err, check.IsNil) {
678 for r := 0; r < npy.Shape[0]; r++ {
679 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
681 c.Check(onehot, check.DeepEquals, []int8{
687 c.Log("=== slice-numpy + onehotSingle ===")
689 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
690 spanningtile/input1 1
692 c.Assert(err, check.IsNil)
694 exited := (&chooseSamples{}).RunCommand("choose-samples", []string{
696 "-case-control-file=" + tmpdir + "/casecontrol.tsv",
697 "-case-control-column=CC",
698 "-input-dir=" + slicedir,
699 "-output-dir=" + tmpdir,
700 }, nil, os.Stderr, os.Stderr)
701 c.Check(exited, check.Equals, 0)
704 exited = (&sliceNumpy{}).RunCommand("slice-numpy", []string{
706 "-single-onehot=true",
707 "-samples=" + tmpdir + "/samples.csv",
709 "-min-coverage=0.75",
710 "-input-dir=" + slicedir,
711 "-output-dir=" + npydir,
713 }, nil, os.Stderr, os.Stderr)
714 c.Check(exited, check.Equals, 0)
715 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
718 f, err := os.Open(npydir + "/onehot.npy")
719 c.Assert(err, check.IsNil)
721 npy, err := gonpy.NewReader(f)
722 c.Assert(err, check.IsNil)
723 c.Check(npy.Shape, check.DeepEquals, []int{2, 1})
724 onehot, err := npy.GetUint32()
725 if c.Check(err, check.IsNil) {
726 for r := 0; r < npy.Shape[0]; r++ {
727 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
729 c.Check(onehot, check.DeepEquals, []uint32{0, 3})
732 f, err = os.Open(npydir + "/onehot-columns.npy")
733 c.Assert(err, check.IsNil)
735 npy, err = gonpy.NewReader(f)
736 c.Assert(err, check.IsNil)
737 c.Check(npy.Shape, check.DeepEquals, []int{5, 4})
738 onehotcols, err := npy.GetInt32()
739 if c.Check(err, check.IsNil) {
740 for r := 0; r < npy.Shape[0]; r++ {
741 c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
743 c.Check(onehotcols, check.DeepEquals, []int32{
747 1000000, 1000000, 1000000, 1000000,
754 func (s *sliceSuite) Test_tv2homhet(c *check.C) {
756 cgnames: []string{"sample1", "sample2", "sample3", "sample4"},
757 chi2Cases: []bool{false, true, true, false},
759 includeVariant1: true,
762 cgs := map[string]CompactGenome{
763 "sample1": CompactGenome{
764 Variants: []tileVariantID{0, 0, 1, 1}, // hom tv=1
766 "sample2": CompactGenome{
767 Variants: []tileVariantID{0, 0, 5, 5}, // hom tv=2
769 "sample3": CompactGenome{
770 Variants: []tileVariantID{0, 0, 5, 1}, // het tv=1, tv=2
772 "sample4": CompactGenome{
773 Variants: []tileVariantID{0, 0, 9, 9}, // hom tv=3
776 maxv := tileVariantID(3)
777 remap := []tileVariantID{0, 1, 0, 0, 0, 2, 0, 0, 0, 3}
778 chunkstarttag := tagID(10)
779 fakevariant := TileVariant{Sequence: []byte("ACGT")}
780 seq := map[tagID][]TileVariant{}
781 for tag := tagID(10); tag < 12; tag++ {
782 seq[tag-chunkstarttag] = []TileVariant{TileVariant{}, fakevariant, TileVariant{}, TileVariant{}, TileVariant{}, fakevariant}
783 c.Logf("=== tag %d", tag)
784 chunk, xref := cmd.tv2homhet(cgs, maxv, remap, tag, chunkstarttag, seq)
785 c.Logf("chunk len=%d", len(chunk))
786 for _, x := range chunk {
789 c.Logf("xref len=%d", len(xref))
790 for _, x := range xref {
793 out := onehotcols2int8(chunk)
794 c.Logf("onehotcols2int8(chunk) len=%d", len(out))
795 for i := 0; i < len(out); i += len(chunk) {
796 c.Logf("%+v", out[i:i+len(chunk)])
798 coords := onehotChunk2Indirect(chunk)
799 c.Logf("onehotChunk2Indirect(chunk) len=%d", len(coords))
800 for _, x := range coords {