1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
12 "github.com/kshedden/gonpy"
16 type sliceSuite struct{}
18 var _ = check.Suite(&sliceSuite{})
20 func (s *sliceSuite) TestImportAndSlice(c *check.C) {
22 err := os.Mkdir(tmpdir+"/lib1", 0777)
23 c.Assert(err, check.IsNil)
24 err = os.Mkdir(tmpdir+"/lib2", 0777)
25 c.Assert(err, check.IsNil)
26 err = os.Mkdir(tmpdir+"/lib3", 0777)
27 c.Assert(err, check.IsNil)
28 cwd, err := os.Getwd()
29 c.Assert(err, check.IsNil)
30 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
31 c.Assert(err, check.IsNil)
32 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
33 c.Assert(err, check.IsNil)
35 err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
36 c.Check(err, check.IsNil)
38 c.Log("=== import testdata/ref ===")
39 exited := (&importer{}).RunCommand("import", []string{
41 "-tag-library", "testdata/tags",
43 "-save-incomplete-tiles",
44 "-o", tmpdir + "/lib1/library1.gob",
46 }, nil, os.Stderr, os.Stderr)
47 c.Assert(exited, check.Equals, 0)
49 c.Log("=== import testdata/pipeline1 ===")
50 exited = (&importer{}).RunCommand("import", []string{
52 "-tag-library", "testdata/tags",
54 "-o", tmpdir + "/lib2/library2.gob",
55 tmpdir + "/pipeline1",
56 }, nil, os.Stderr, os.Stderr)
57 c.Assert(exited, check.Equals, 0)
59 c.Log("=== import pipeline1dup ===")
60 exited = (&importer{}).RunCommand("import", []string{
62 "-tag-library", "testdata/tags",
64 "-o", tmpdir + "/lib3/library3.gob",
65 tmpdir + "/pipeline1dup",
66 }, nil, os.Stderr, os.Stderr)
67 c.Assert(exited, check.Equals, 0)
71 c.Log("=== slice ===")
72 exited = (&slicecmd{}).RunCommand("slice", []string{
74 "-output-dir=" + slicedir,
79 }, nil, os.Stderr, os.Stderr)
80 c.Check(exited, check.Equals, 0)
81 out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
87 exited = (&dump{}).RunCommand("dump", []string{
90 "-input-dir=" + slicedir,
91 "-output-dir=" + dumpdir,
92 }, nil, os.Stderr, os.Stderr)
93 c.Check(exited, check.Equals, 0)
94 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
96 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
97 c.Assert(err, check.IsNil)
99 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTG.*`)
102 c.Log("=== slice-numpy ===")
105 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
107 "-input-dir=" + slicedir,
108 "-output-dir=" + npydir,
109 }, nil, os.Stderr, os.Stderr)
110 c.Check(exited, check.Equals, 0)
111 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
114 f, err := os.Open(npydir + "/matrix.0000.npy")
115 c.Assert(err, check.IsNil)
117 npy, err := gonpy.NewReader(f)
118 c.Assert(err, check.IsNil)
119 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
120 variants, err := npy.GetInt16()
121 c.Check(variants, check.DeepEquals, []int16{2, 1, 1, 2, -1, -1, 1, 1, 2, 1, 1, 2, -1, -1, 1, 1})
123 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
124 c.Assert(err, check.IsNil)
125 c.Logf("%s", annotations)
126 for _, s := range []string{
129 "chr1:g.1_3delinsGGC",
132 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
135 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
136 c.Assert(err, check.IsNil)
137 c.Logf("%s", annotations)
138 for _, s := range []string{
139 ",2,chr2:g.1_3delinsAAA",
140 ",2,chr2:g.125_127delinsAAA",
141 ",4,chr2:g.125_127delinsAAA",
143 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
147 c.Log("=== slice-numpy + regions ===")
150 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
152 "-regions=" + tmpdir + "/chr1-12-100.bed",
153 "-input-dir=" + slicedir,
154 "-output-dir=" + npydir,
155 "-chunked-hgvs-matrix=true",
156 }, nil, os.Stderr, os.Stderr)
157 c.Check(exited, check.Equals, 0)
158 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
161 f, err := os.Open(npydir + "/matrix.0000.npy")
162 c.Assert(err, check.IsNil)
164 npy, err := gonpy.NewReader(f)
165 c.Assert(err, check.IsNil)
166 c.Check(npy.Shape, check.DeepEquals, []int{4, 2})
167 variants, err := npy.GetInt16()
168 c.Check(variants, check.DeepEquals, []int16{2, 1, -1, -1, 2, 1, -1, -1})
170 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
171 c.Assert(err, check.IsNil)
172 c.Logf("%s", annotations)
173 for _, s := range []string{
176 "chr1:g.1_3delinsGGC",
179 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
182 for _, fnm := range []string{
183 npydir + "/matrix.0001.annotations.csv",
184 npydir + "/matrix.0002.annotations.csv",
186 annotations, err := ioutil.ReadFile(fnm)
187 c.Assert(err, check.IsNil)
188 c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
192 err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
193 c.Check(err, check.IsNil)
195 c.Log("=== slice-numpy + regions + merge ===")
198 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
200 "-regions=" + tmpdir + "/chr1and2-100-200.bed",
201 "-input-dir=" + slicedir,
202 "-output-dir=" + npydir,
203 "-merge-output=true",
204 "-single-hgvs-matrix=true",
205 }, nil, os.Stderr, os.Stderr)
206 c.Check(exited, check.Equals, 0)
207 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
210 f, err := os.Open(npydir + "/matrix.npy")
211 c.Assert(err, check.IsNil)
213 npy, err := gonpy.NewReader(f)
214 c.Assert(err, check.IsNil)
215 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
216 variants, err := npy.GetInt16()
217 if c.Check(err, check.IsNil) {
218 c.Check(variants, check.DeepEquals, []int16{
226 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
227 c.Assert(err, check.IsNil)
228 c.Logf("%s", annotations)
229 for _, s := range []string{
230 "0,0,1,chr1:g.161A>T",
231 "0,0,1,chr1:g.178A>T",
232 "4,1,2,chr2:g.125_127delinsAAA",
234 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
238 c.Log("=== slice-numpy + chunked hgvs matrix ===")
240 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
243 pipeline1dup/input1 1
244 pipeline1dup/input2 0
246 c.Assert(err, check.IsNil)
248 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
250 "-chunked-hgvs-matrix=true",
251 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
252 "-chi2-case-control-column=CC",
254 "-min-coverage=0.75",
255 "-input-dir=" + slicedir,
256 "-output-dir=" + npydir,
257 }, nil, os.Stderr, os.Stderr)
258 c.Check(exited, check.Equals, 0)
259 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
262 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
263 c.Assert(err, check.IsNil)
264 c.Check(string(annotations), check.Equals, `0,chr2:g.470_472del
270 c.Log("=== slice-numpy + onehotChunked ===")
272 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
275 pipeline1dup/input1 1
276 pipeline1dup/input2 0
278 c.Assert(err, check.IsNil)
280 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
282 "-chunked-onehot=true",
283 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
284 "-chi2-case-control-column=CC",
286 "-min-coverage=0.75",
287 "-input-dir=" + slicedir,
288 "-output-dir=" + npydir,
289 }, nil, os.Stderr, os.Stderr)
290 c.Check(exited, check.Equals, 0)
291 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
294 f, err := os.Open(npydir + "/onehot.0002.npy")
295 c.Assert(err, check.IsNil)
297 npy, err := gonpy.NewReader(f)
298 c.Assert(err, check.IsNil)
299 c.Check(npy.Shape, check.DeepEquals, []int{4, 3})
300 onehot, err := npy.GetInt8()
301 if c.Check(err, check.IsNil) {
302 for r := 0; r < npy.Shape[0]; r++ {
303 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
305 c.Check(onehot, check.DeepEquals, []int8{
308 0, 1, 0, // dup/input1
309 1, 0, 1, // dup/input2
314 c.Log("=== slice-numpy + onehotSingle ===")
316 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
319 pipeline1dup/input1 1
320 pipeline1dup/input2 0
322 c.Assert(err, check.IsNil)
324 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
326 "-single-onehot=true",
327 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
328 "-chi2-case-control-column=CC",
330 "-min-coverage=0.75",
331 "-input-dir=" + slicedir,
332 "-output-dir=" + npydir,
334 }, nil, os.Stderr, os.Stderr)
335 c.Check(exited, check.Equals, 0)
336 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
339 f, err := os.Open(npydir + "/onehot.npy")
340 c.Assert(err, check.IsNil)
342 npy, err := gonpy.NewReader(f)
343 c.Assert(err, check.IsNil)
344 c.Check(npy.Shape, check.DeepEquals, []int{2, 16})
345 onehot, err := npy.GetUint32()
346 if c.Check(err, check.IsNil) {
347 for r := 0; r < npy.Shape[0]; r++ {
348 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
350 c.Check(onehot, check.DeepEquals, []uint32{
351 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 0, 2,
352 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
356 f, err = os.Open(npydir + "/onehot-columns.npy")
357 c.Assert(err, check.IsNil)
359 npy, err = gonpy.NewReader(f)
360 c.Assert(err, check.IsNil)
361 c.Check(npy.Shape, check.DeepEquals, []int{8, 5})
362 onehotcols, err := npy.GetInt32()
363 if c.Check(err, check.IsNil) {
364 for r := 0; r < npy.Shape[0]; r++ {
365 c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
367 c.Check(onehotcols, check.DeepEquals, []int32{
368 0, 0, 1, 4, 4, 4, 6, 6,
369 2, 3, 2, 2, 3, 4, 2, 3,
370 0, 1, 0, 0, 0, 0, 0, 0,
371 157299, 157299, 157299, 157299, 157299, 157299, 157299, 157299,
372 803273, 803273, 803273, 803273, 803273, 803273, 803273, 803273,
378 func (s *sliceSuite) Test_tv2homhet(c *check.C) {
380 cgnames: []string{"sample1", "sample2", "sample3", "sample4"},
381 chi2Cases: []bool{false, true, true, false},
383 includeVariant1: true,
385 cgs := map[string]CompactGenome{
386 "sample1": CompactGenome{
387 Variants: []tileVariantID{0, 0, 1, 1}, // hom tv=1
389 "sample2": CompactGenome{
390 Variants: []tileVariantID{0, 0, 5, 5}, // hom tv=2
392 "sample3": CompactGenome{
393 Variants: []tileVariantID{0, 0, 5, 1}, // het tv=1, tv=2
395 "sample4": CompactGenome{
396 Variants: []tileVariantID{0, 0, 9, 9}, // hom tv=3
399 maxv := tileVariantID(3)
400 remap := []tileVariantID{0, 1, 0, 0, 0, 2, 0, 0, 0, 3}
401 chunkstarttag := tagID(10)
402 for tag := tagID(10); tag < 12; tag++ {
403 c.Logf("=== tag %d", tag)
404 chunk, xref := cmd.tv2homhet(cgs, maxv, remap, tag, chunkstarttag)
405 c.Logf("chunk len=%d", len(chunk))
406 for _, x := range chunk {
409 c.Logf("xref len=%d", len(xref))
410 for _, x := range xref {
413 out := onehotcols2int8(chunk)
414 c.Logf("onehotcols2int8(chunk) len=%d", len(out))
415 for i := 0; i < len(out); i += len(chunk) {
416 c.Logf("%+v", out[i:i+len(chunk)])
418 coords := onehotChunk2Indirect(chunk)
419 c.Logf("onehotChunk2Indirect(chunk) len=%d", len(coords))
420 for _, x := range coords {