X-Git-Url: https://git.arvados.org/lightning.git/blobdiff_plain/d81fc575f556e71b5c552d03c626b43c0744b45f..5d939b4ccdab73d0729db56219ba674fb9995c26:/slice_test.go diff --git a/slice_test.go b/slice_test.go index 7332100634..47c43e1cee 100644 --- a/slice_test.go +++ b/slice_test.go @@ -81,6 +81,24 @@ func (s *sliceSuite) TestImportAndSlice(c *check.C) { out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput() c.Logf("%s", out) + c.Log("=== dump ===") + { + dumpdir := c.MkDir() + exited = (&dump{}).RunCommand("dump", []string{ + "-local=true", + "-tags=4,6,7", + "-input-dir=" + slicedir, + "-output-dir=" + dumpdir, + }, nil, os.Stderr, os.Stderr) + c.Check(exited, check.Equals, 0) + out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput() + c.Logf("%s", out) + dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv") + c.Assert(err, check.IsNil) + c.Logf("%s", dumped) + c.Check(string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTG.*`) + } + c.Log("=== slice-numpy ===") { npydir := c.MkDir() @@ -134,6 +152,7 @@ func (s *sliceSuite) TestImportAndSlice(c *check.C) { "-regions=" + tmpdir + "/chr1-12-100.bed", "-input-dir=" + slicedir, "-output-dir=" + npydir, + "-chunked-hgvs-matrix=true", }, nil, os.Stderr, os.Stderr) c.Check(exited, check.Equals, 0) out, _ := exec.Command("find", npydir, "-ls").CombinedOutput() @@ -160,9 +179,172 @@ func (s *sliceSuite) TestImportAndSlice(c *check.C) { c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*") } - annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv") + for _, fnm := range []string{ + npydir + "/matrix.0001.annotations.csv", + npydir + "/matrix.0002.annotations.csv", + } { + annotations, err := ioutil.ReadFile(fnm) + c.Assert(err, check.IsNil) + c.Check(string(annotations), check.Equals, "", check.Commentf(fnm)) + } + } + + err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644) + c.Check(err, check.IsNil) + + c.Log("=== slice-numpy + regions + merge ===") + { + npydir := c.MkDir() + exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{ + "-local=true", + "-regions=" + tmpdir + "/chr1and2-100-200.bed", + "-input-dir=" + slicedir, + "-output-dir=" + npydir, + "-merge-output=true", + "-single-hgvs-matrix=true", + }, nil, os.Stderr, os.Stderr) + c.Check(exited, check.Equals, 0) + out, _ := exec.Command("find", npydir, "-ls").CombinedOutput() + c.Logf("%s", out) + + f, err := os.Open(npydir + "/matrix.npy") + c.Assert(err, check.IsNil) + defer f.Close() + npy, err := gonpy.NewReader(f) + c.Assert(err, check.IsNil) + c.Check(npy.Shape, check.DeepEquals, []int{4, 4}) + variants, err := npy.GetInt16() + if c.Check(err, check.IsNil) { + c.Check(variants, check.DeepEquals, []int16{2, 1, 3, 1, -1, -1, 4, 2, 2, 1, 3, 1, -1, -1, 4, 2}) + } + + annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv") c.Assert(err, check.IsNil) c.Logf("%s", annotations) - c.Check(string(annotations), check.Equals, "") + for _, s := range []string{ + "0,0,1,chr1:g.161A>T", + "0,0,1,chr1:g.178A>T", + "4,1,2,chr2:g.125_127delinsAAA", + } { + c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*") + } + } + + c.Log("=== slice-numpy + chunked hgvs matrix ===") + { + err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC +pipeline1/input1 1 +pipeline1/input2 0 +pipeline1dup/input1 1 +pipeline1dup/input2 0 +`), 0600) + c.Assert(err, check.IsNil) + npydir := c.MkDir() + exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{ + "-local=true", + "-chunked-hgvs-matrix=true", + "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv", + "-chi2-case-control-column=CC", + "-chi2-p-value=0.5", + "-min-coverage=0.75", + "-input-dir=" + slicedir, + "-output-dir=" + npydir, + }, nil, os.Stderr, os.Stderr) + c.Check(exited, check.Equals, 0) + out, _ := exec.Command("find", npydir, "-ls").CombinedOutput() + c.Logf("%s", out) + + annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv") + c.Assert(err, check.IsNil) + c.Check(string(annotations), check.Equals, `0,chr2:g.470_472del +1,chr2:g.471G>A +2,chr2:g.472G>A +`) + } + + c.Log("=== slice-numpy + onehotChunked ===") + { + err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC +pipeline1/input1 1 +pipeline1/input2 0 +pipeline1dup/input1 1 +pipeline1dup/input2 0 +`), 0600) + c.Assert(err, check.IsNil) + npydir := c.MkDir() + exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{ + "-local=true", + "-chunked-onehot=true", + "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv", + "-chi2-case-control-column=CC", + "-chi2-p-value=0.5", + "-min-coverage=0.75", + "-input-dir=" + slicedir, + "-output-dir=" + npydir, + }, nil, os.Stderr, os.Stderr) + c.Check(exited, check.Equals, 0) + out, _ := exec.Command("find", npydir, "-ls").CombinedOutput() + c.Logf("%s", out) + + f, err := os.Open(npydir + "/onehot.0002.npy") + c.Assert(err, check.IsNil) + defer f.Close() + npy, err := gonpy.NewReader(f) + c.Assert(err, check.IsNil) + c.Check(npy.Shape, check.DeepEquals, []int{4, 6}) + onehot, err := npy.GetInt8() + if c.Check(err, check.IsNil) { + for r := 0; r < npy.Shape[0]; r++ { + c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]]) + } + c.Check(onehot, check.DeepEquals, []int8{ + 0, 0, 0, 1, 0, 0, // input1 + 0, 1, 0, 0, 0, 1, // input2 + 0, 0, 0, 1, 0, 0, // dup/input1 + 0, 1, 0, 0, 0, 1, // dup/input2 + }) + } + } + + c.Log("=== slice-numpy + onehotSingle ===") + { + err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC +pipeline1/input1 1 +pipeline1/input2 0 +pipeline1dup/input1 1 +pipeline1dup/input2 0 +`), 0600) + c.Assert(err, check.IsNil) + npydir := c.MkDir() + exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{ + "-local=true", + "-single-onehot=true", + "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv", + "-chi2-case-control-column=CC", + "-chi2-p-value=0.5", + "-min-coverage=0.75", + "-input-dir=" + slicedir, + "-output-dir=" + npydir, + }, nil, os.Stderr, os.Stderr) + c.Check(exited, check.Equals, 0) + out, _ := exec.Command("find", npydir, "-ls").CombinedOutput() + c.Logf("%s", out) + + f, err := os.Open(npydir + "/onehot.npy") + c.Assert(err, check.IsNil) + defer f.Close() + npy, err := gonpy.NewReader(f) + c.Assert(err, check.IsNil) + c.Check(npy.Shape, check.DeepEquals, []int{2, 16}) + onehot, err := npy.GetUint32() + if c.Check(err, check.IsNil) { + for r := 0; r < npy.Shape[0]; r++ { + c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]]) + } + c.Check(onehot, check.DeepEquals, []uint32{ + 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 0, 2, + 1, 1, 2, 2, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, + }) + } } }