1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
12 "github.com/kshedden/gonpy"
16 type sliceSuite struct{}
18 var _ = check.Suite(&sliceSuite{})
20 func (s *sliceSuite) TestImportAndSlice(c *check.C) {
22 err := os.Mkdir(tmpdir+"/lib1", 0777)
23 c.Assert(err, check.IsNil)
24 err = os.Mkdir(tmpdir+"/lib2", 0777)
25 c.Assert(err, check.IsNil)
26 err = os.Mkdir(tmpdir+"/lib3", 0777)
27 c.Assert(err, check.IsNil)
28 cwd, err := os.Getwd()
29 c.Assert(err, check.IsNil)
30 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
31 c.Assert(err, check.IsNil)
32 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
33 c.Assert(err, check.IsNil)
35 err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
36 c.Check(err, check.IsNil)
38 c.Log("=== import testdata/ref ===")
39 exited := (&importer{}).RunCommand("import", []string{
41 "-tag-library", "testdata/tags",
43 "-save-incomplete-tiles",
44 "-o", tmpdir + "/lib1/library1.gob",
46 }, nil, os.Stderr, os.Stderr)
47 c.Assert(exited, check.Equals, 0)
49 c.Log("=== import testdata/pipeline1 ===")
50 exited = (&importer{}).RunCommand("import", []string{
52 "-tag-library", "testdata/tags",
54 "-o", tmpdir + "/lib2/library2.gob",
55 tmpdir + "/pipeline1",
56 }, nil, os.Stderr, os.Stderr)
57 c.Assert(exited, check.Equals, 0)
59 c.Log("=== import pipeline1dup ===")
60 exited = (&importer{}).RunCommand("import", []string{
62 "-tag-library", "testdata/tags",
64 "-o", tmpdir + "/lib3/library3.gob",
65 tmpdir + "/pipeline1dup",
66 }, nil, os.Stderr, os.Stderr)
67 c.Assert(exited, check.Equals, 0)
71 c.Log("=== slice ===")
72 exited = (&slicecmd{}).RunCommand("slice", []string{
74 "-output-dir=" + slicedir,
79 }, nil, os.Stderr, os.Stderr)
80 c.Check(exited, check.Equals, 0)
81 out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
87 exited = (&dump{}).RunCommand("dump", []string{
90 "-input-dir=" + slicedir,
91 "-output-dir=" + dumpdir,
92 }, nil, os.Stderr, os.Stderr)
93 c.Check(exited, check.Equals, 0)
94 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
96 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
97 c.Assert(err, check.IsNil)
99 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTG.*`)
102 c.Log("=== slice-numpy ===")
105 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
107 "-input-dir=" + slicedir,
108 "-output-dir=" + npydir,
109 }, nil, os.Stderr, os.Stderr)
110 c.Check(exited, check.Equals, 0)
111 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
114 f, err := os.Open(npydir + "/matrix.0000.npy")
115 c.Assert(err, check.IsNil)
117 npy, err := gonpy.NewReader(f)
118 c.Assert(err, check.IsNil)
119 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
120 variants, err := npy.GetInt16()
121 c.Check(variants, check.DeepEquals, []int16{2, 1, 1, 2, -1, -1, 1, 1, 2, 1, 1, 2, -1, -1, 1, 1})
123 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
124 c.Assert(err, check.IsNil)
125 c.Logf("%s", annotations)
126 for _, s := range []string{
129 "chr1:g.1_3delinsGGC",
132 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
135 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
136 c.Assert(err, check.IsNil)
137 c.Logf("%s", annotations)
138 for _, s := range []string{
139 ",2,chr2:g.1_3delinsAAA",
140 ",2,chr2:g.125_127delinsAAA",
141 ",4,chr2:g.125_127delinsAAA",
143 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
147 c.Log("=== slice-numpy + regions ===")
150 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
152 "-regions=" + tmpdir + "/chr1-12-100.bed",
153 "-input-dir=" + slicedir,
154 "-output-dir=" + npydir,
155 "-chunked-hgvs-matrix=true",
156 }, nil, os.Stderr, os.Stderr)
157 c.Check(exited, check.Equals, 0)
158 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
161 f, err := os.Open(npydir + "/matrix.0000.npy")
162 c.Assert(err, check.IsNil)
164 npy, err := gonpy.NewReader(f)
165 c.Assert(err, check.IsNil)
166 c.Check(npy.Shape, check.DeepEquals, []int{4, 2})
167 variants, err := npy.GetInt16()
168 c.Check(variants, check.DeepEquals, []int16{2, 1, -1, -1, 2, 1, -1, -1})
170 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
171 c.Assert(err, check.IsNil)
172 c.Logf("%s", annotations)
173 for _, s := range []string{
176 "chr1:g.1_3delinsGGC",
179 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
182 for _, fnm := range []string{
183 npydir + "/matrix.0001.annotations.csv",
184 npydir + "/matrix.0002.annotations.csv",
186 annotations, err := ioutil.ReadFile(fnm)
187 c.Assert(err, check.IsNil)
188 c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
192 err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
193 c.Check(err, check.IsNil)
195 c.Log("=== slice-numpy + regions + merge ===")
198 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
200 "-regions=" + tmpdir + "/chr1and2-100-200.bed",
201 "-input-dir=" + slicedir,
202 "-output-dir=" + npydir,
203 "-merge-output=true",
204 "-single-hgvs-matrix=true",
205 }, nil, os.Stderr, os.Stderr)
206 c.Check(exited, check.Equals, 0)
207 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
210 f, err := os.Open(npydir + "/matrix.npy")
211 c.Assert(err, check.IsNil)
213 npy, err := gonpy.NewReader(f)
214 c.Assert(err, check.IsNil)
215 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
216 variants, err := npy.GetInt16()
217 if c.Check(err, check.IsNil) {
218 c.Check(variants, check.DeepEquals, []int16{
226 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
227 c.Assert(err, check.IsNil)
228 c.Logf("%s", annotations)
229 for _, s := range []string{
230 "0,0,1,chr1:g.161A>T",
231 "0,0,1,chr1:g.178A>T",
232 "4,1,2,chr2:g.125_127delinsAAA",
234 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
238 c.Log("=== slice-numpy + chunked hgvs matrix ===")
240 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
243 pipeline1dup/input1 1
244 pipeline1dup/input2 0
246 c.Assert(err, check.IsNil)
248 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
250 "-chunked-hgvs-matrix=true",
251 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
252 "-chi2-case-control-column=CC",
254 "-min-coverage=0.75",
255 "-input-dir=" + slicedir,
256 "-output-dir=" + npydir,
257 }, nil, os.Stderr, os.Stderr)
258 c.Check(exited, check.Equals, 0)
259 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
262 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
263 c.Assert(err, check.IsNil)
264 c.Check(string(annotations), check.Equals, `0,chr2:g.470_472del
270 c.Log("=== slice-numpy + onehotChunked ===")
272 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
275 pipeline1dup/input1 1
276 pipeline1dup/input2 0
278 c.Assert(err, check.IsNil)
280 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
282 "-chunked-onehot=true",
283 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
284 "-chi2-case-control-column=CC",
286 "-min-coverage=0.75",
287 "-input-dir=" + slicedir,
288 "-output-dir=" + npydir,
289 }, nil, os.Stderr, os.Stderr)
290 c.Check(exited, check.Equals, 0)
291 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
294 f, err := os.Open(npydir + "/onehot.0002.npy")
295 c.Assert(err, check.IsNil)
297 npy, err := gonpy.NewReader(f)
298 c.Assert(err, check.IsNil)
299 c.Check(npy.Shape, check.DeepEquals, []int{4, 3})
300 onehot, err := npy.GetInt8()
301 if c.Check(err, check.IsNil) {
302 for r := 0; r < npy.Shape[0]; r++ {
303 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
305 c.Check(onehot, check.DeepEquals, []int8{
308 0, 1, 0, // dup/input1
309 1, 0, 1, // dup/input2
314 c.Log("=== slice-numpy + onehotSingle ===")
316 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
319 pipeline1dup/input1 1
320 pipeline1dup/input2 0
322 c.Assert(err, check.IsNil)
324 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
326 "-single-onehot=true",
327 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
328 "-chi2-case-control-column=CC",
330 "-min-coverage=0.75",
331 "-input-dir=" + slicedir,
332 "-output-dir=" + npydir,
334 }, nil, os.Stderr, os.Stderr)
335 c.Check(exited, check.Equals, 0)
336 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
339 f, err := os.Open(npydir + "/onehot.npy")
340 c.Assert(err, check.IsNil)
342 npy, err := gonpy.NewReader(f)
343 c.Assert(err, check.IsNil)
344 c.Check(npy.Shape, check.DeepEquals, []int{2, 16})
345 onehot, err := npy.GetUint32()
346 if c.Check(err, check.IsNil) {
347 for r := 0; r < npy.Shape[0]; r++ {
348 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
350 c.Check(onehot, check.DeepEquals, []uint32{
351 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 0, 2,
352 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
356 f, err = os.Open(npydir + "/onehot-columns.npy")
357 c.Assert(err, check.IsNil)
359 npy, err = gonpy.NewReader(f)
360 c.Assert(err, check.IsNil)
361 c.Check(npy.Shape, check.DeepEquals, []int{8, 5})
362 onehotcols, err := npy.GetInt32()
363 if c.Check(err, check.IsNil) {
364 for r := 0; r < npy.Shape[0]; r++ {
365 c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
367 c.Check(onehotcols, check.DeepEquals, []int32{
368 0, 0, 1, 4, 4, 4, 6, 6,
369 2, 3, 2, 2, 3, 4, 2, 3,
370 0, 1, 0, 0, 0, 0, 0, 0,
371 157299, 157299, 157299, 157299, 157299, 157299, 157299, 157299,
372 803273, 803273, 803273, 803273, 803273, 803273, 803273, 803273,
378 func (s *sliceSuite) Test_tv2homhet(c *check.C) {
380 cgnames: []string{"sample1", "sample2", "sample3", "sample4"},
381 chi2Cases: []bool{false, true, true, false},
383 includeVariant1: true,
386 cgs := map[string]CompactGenome{
387 "sample1": CompactGenome{
388 Variants: []tileVariantID{0, 0, 1, 1}, // hom tv=1
390 "sample2": CompactGenome{
391 Variants: []tileVariantID{0, 0, 5, 5}, // hom tv=2
393 "sample3": CompactGenome{
394 Variants: []tileVariantID{0, 0, 5, 1}, // het tv=1, tv=2
396 "sample4": CompactGenome{
397 Variants: []tileVariantID{0, 0, 9, 9}, // hom tv=3
400 maxv := tileVariantID(3)
401 remap := []tileVariantID{0, 1, 0, 0, 0, 2, 0, 0, 0, 3}
402 chunkstarttag := tagID(10)
403 fakevariant := TileVariant{Sequence: []byte("ACGT")}
404 seq := map[tagID][]TileVariant{}
405 for tag := tagID(10); tag < 12; tag++ {
406 seq[tag-chunkstarttag] = []TileVariant{TileVariant{}, fakevariant, TileVariant{}, TileVariant{}, TileVariant{}, fakevariant}
407 c.Logf("=== tag %d", tag)
408 chunk, xref := cmd.tv2homhet(cgs, maxv, remap, tag, chunkstarttag, seq)
409 c.Logf("chunk len=%d", len(chunk))
410 for _, x := range chunk {
413 c.Logf("xref len=%d", len(xref))
414 for _, x := range xref {
417 out := onehotcols2int8(chunk)
418 c.Logf("onehotcols2int8(chunk) len=%d", len(out))
419 for i := 0; i < len(out); i += len(chunk) {
420 c.Logf("%+v", out[i:i+len(chunk)])
422 coords := onehotChunk2Indirect(chunk)
423 c.Logf("onehotChunk2Indirect(chunk) len=%d", len(coords))
424 for _, x := range coords {