1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
12 "github.com/kshedden/gonpy"
16 type sliceSuite struct{}
18 var _ = check.Suite(&sliceSuite{})
20 func (s *sliceSuite) TestImportAndSlice(c *check.C) {
22 err := os.Mkdir(tmpdir+"/lib1", 0777)
23 c.Assert(err, check.IsNil)
24 err = os.Mkdir(tmpdir+"/lib2", 0777)
25 c.Assert(err, check.IsNil)
26 err = os.Mkdir(tmpdir+"/lib3", 0777)
27 c.Assert(err, check.IsNil)
28 cwd, err := os.Getwd()
29 c.Assert(err, check.IsNil)
30 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
31 c.Assert(err, check.IsNil)
32 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
33 c.Assert(err, check.IsNil)
35 err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
36 c.Check(err, check.IsNil)
38 c.Log("=== import testdata/ref ===")
39 exited := (&importer{}).RunCommand("import", []string{
41 "-tag-library", "testdata/tags",
43 "-save-incomplete-tiles",
44 "-o", tmpdir + "/lib1/library1.gob",
46 }, nil, os.Stderr, os.Stderr)
47 c.Assert(exited, check.Equals, 0)
49 c.Log("=== import testdata/pipeline1 ===")
50 exited = (&importer{}).RunCommand("import", []string{
52 "-tag-library", "testdata/tags",
54 "-o", tmpdir + "/lib2/library2.gob",
55 tmpdir + "/pipeline1",
56 }, nil, os.Stderr, os.Stderr)
57 c.Assert(exited, check.Equals, 0)
59 c.Log("=== import pipeline1dup ===")
60 exited = (&importer{}).RunCommand("import", []string{
62 "-tag-library", "testdata/tags",
64 "-o", tmpdir + "/lib3/library3.gob",
65 tmpdir + "/pipeline1dup",
66 }, nil, os.Stderr, os.Stderr)
67 c.Assert(exited, check.Equals, 0)
71 c.Log("=== slice ===")
72 exited = (&slicecmd{}).RunCommand("slice", []string{
74 "-output-dir=" + slicedir,
79 }, nil, os.Stderr, os.Stderr)
80 c.Check(exited, check.Equals, 0)
81 out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
87 exited = (&dump{}).RunCommand("dump", []string{
90 "-input-dir=" + slicedir,
91 "-output-dir=" + dumpdir,
92 }, nil, os.Stderr, os.Stderr)
93 c.Check(exited, check.Equals, 0)
94 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
96 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
97 c.Assert(err, check.IsNil)
99 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTG.*`)
102 c.Log("=== slice-numpy ===")
105 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
107 "-input-dir=" + slicedir,
108 "-output-dir=" + npydir,
109 }, nil, os.Stderr, os.Stderr)
110 c.Check(exited, check.Equals, 0)
111 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
114 f, err := os.Open(npydir + "/matrix.0000.npy")
115 c.Assert(err, check.IsNil)
117 npy, err := gonpy.NewReader(f)
118 c.Assert(err, check.IsNil)
119 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
120 variants, err := npy.GetInt16()
121 c.Check(variants, check.DeepEquals, []int16{2, 1, 1, 2, -1, -1, 1, 1, 2, 1, 1, 2, -1, -1, 1, 1})
123 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
124 c.Assert(err, check.IsNil)
125 c.Logf("%s", annotations)
126 for _, s := range []string{
129 "chr1:g.1_3delinsGGC",
132 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
135 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
136 c.Assert(err, check.IsNil)
137 c.Logf("%s", annotations)
138 for _, s := range []string{
139 ",2,chr2:g.1_3delinsAAA",
140 ",2,chr2:g.125_127delinsAAA",
141 ",4,chr2:g.125_127delinsAAA",
143 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
147 c.Log("=== slice-numpy + regions ===")
150 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
152 "-regions=" + tmpdir + "/chr1-12-100.bed",
153 "-input-dir=" + slicedir,
154 "-output-dir=" + npydir,
155 "-chunked-hgvs-matrix=true",
156 }, nil, os.Stderr, os.Stderr)
157 c.Check(exited, check.Equals, 0)
158 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
161 f, err := os.Open(npydir + "/matrix.0000.npy")
162 c.Assert(err, check.IsNil)
164 npy, err := gonpy.NewReader(f)
165 c.Assert(err, check.IsNil)
166 c.Check(npy.Shape, check.DeepEquals, []int{4, 2})
167 variants, err := npy.GetInt16()
168 c.Check(variants, check.DeepEquals, []int16{2, 1, -1, -1, 2, 1, -1, -1})
170 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
171 c.Assert(err, check.IsNil)
172 c.Logf("%s", annotations)
173 for _, s := range []string{
176 "chr1:g.1_3delinsGGC",
179 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
182 for _, fnm := range []string{
183 npydir + "/matrix.0001.annotations.csv",
184 npydir + "/matrix.0002.annotations.csv",
186 annotations, err := ioutil.ReadFile(fnm)
187 c.Assert(err, check.IsNil)
188 c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
192 err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
193 c.Check(err, check.IsNil)
195 c.Log("=== slice-numpy + regions + merge ===")
198 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
200 "-regions=" + tmpdir + "/chr1and2-100-200.bed",
201 "-input-dir=" + slicedir,
202 "-output-dir=" + npydir,
203 "-merge-output=true",
204 "-single-hgvs-matrix=true",
205 }, nil, os.Stderr, os.Stderr)
206 c.Check(exited, check.Equals, 0)
207 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
210 f, err := os.Open(npydir + "/matrix.npy")
211 c.Assert(err, check.IsNil)
213 npy, err := gonpy.NewReader(f)
214 c.Assert(err, check.IsNil)
215 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
216 variants, err := npy.GetInt16()
217 if c.Check(err, check.IsNil) {
218 c.Check(variants, check.DeepEquals, []int16{
226 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
227 c.Assert(err, check.IsNil)
228 c.Logf("%s", annotations)
229 for _, s := range []string{
230 "0,0,1,chr1:g.161A>T",
231 "0,0,1,chr1:g.178A>T",
232 "4,1,2,chr2:g.125_127delinsAAA",
234 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
238 c.Log("=== slice-numpy + chunked hgvs matrix ===")
240 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
243 pipeline1dup/input1 1
244 pipeline1dup/input2 0
246 c.Assert(err, check.IsNil)
248 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
250 "-chunked-hgvs-matrix=true",
251 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
252 "-chi2-case-control-column=CC",
254 "-min-coverage=0.75",
255 "-input-dir=" + slicedir,
256 "-output-dir=" + npydir,
257 }, nil, os.Stderr, os.Stderr)
258 c.Check(exited, check.Equals, 0)
259 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
262 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
263 c.Assert(err, check.IsNil)
264 c.Check(string(annotations), check.Equals, `0,chr2:g.470_472del
270 c.Log("=== slice-numpy + onehotChunked ===")
272 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
275 pipeline1dup/input1 1
276 pipeline1dup/input2 0
278 c.Assert(err, check.IsNil)
280 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
282 "-chunked-onehot=true",
283 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
284 "-chi2-case-control-column=CC",
286 "-min-coverage=0.75",
287 "-input-dir=" + slicedir,
288 "-output-dir=" + npydir,
289 }, nil, os.Stderr, os.Stderr)
290 c.Check(exited, check.Equals, 0)
291 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
294 f, err := os.Open(npydir + "/onehot.0002.npy")
295 c.Assert(err, check.IsNil)
297 npy, err := gonpy.NewReader(f)
298 c.Assert(err, check.IsNil)
299 c.Check(npy.Shape, check.DeepEquals, []int{4, 3})
300 onehot, err := npy.GetInt8()
301 if c.Check(err, check.IsNil) {
302 for r := 0; r < npy.Shape[0]; r++ {
303 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
305 c.Check(onehot, check.DeepEquals, []int8{
308 0, 1, 0, // dup/input1
309 1, 0, 1, // dup/input2
314 c.Log("=== slice-numpy + onehotSingle ===")
316 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
319 pipeline1dup/input1 1
320 pipeline1dup/input2 0
322 c.Assert(err, check.IsNil)
324 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
326 "-single-onehot=true",
327 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
328 "-chi2-case-control-column=CC",
330 "-min-coverage=0.75",
331 "-input-dir=" + slicedir,
332 "-output-dir=" + npydir,
334 }, nil, os.Stderr, os.Stderr)
335 c.Check(exited, check.Equals, 0)
336 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
339 f, err := os.Open(npydir + "/onehot.npy")
340 c.Assert(err, check.IsNil)
342 npy, err := gonpy.NewReader(f)
343 c.Assert(err, check.IsNil)
344 c.Check(npy.Shape, check.DeepEquals, []int{2, 12})
345 onehot, err := npy.GetUint32()
346 if c.Check(err, check.IsNil) {
347 for r := 0; r < npy.Shape[0]; r++ {
348 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
350 c.Check(onehot, check.DeepEquals, []uint32{
351 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 0, 2,
352 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,
356 f, err = os.Open(npydir + "/onehot-columns.npy")
357 c.Assert(err, check.IsNil)
359 npy, err = gonpy.NewReader(f)
360 c.Assert(err, check.IsNil)
361 c.Check(npy.Shape, check.DeepEquals, []int{5, 6})
362 onehotcols, err := npy.GetInt32()
363 if c.Check(err, check.IsNil) {
364 for r := 0; r < npy.Shape[0]; r++ {
365 c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
367 c.Check(onehotcols, check.DeepEquals, []int32{
371 157299, 157299, 157299, 157299, 157299, 157299,
372 803273, 803273, 803273, 803273, 803273, 803273,
378 func (s *sliceSuite) Test_tv2homhet(c *check.C) {
380 cgnames: []string{"sample1", "sample2", "sample3", "sample4"},
381 chi2Cases: []bool{false, true, true, false},
383 includeVariant1: true,
386 cgs := map[string]CompactGenome{
387 "sample1": CompactGenome{
388 Variants: []tileVariantID{0, 0, 1, 1}, // hom tv=1
390 "sample2": CompactGenome{
391 Variants: []tileVariantID{0, 0, 5, 5}, // hom tv=2
393 "sample3": CompactGenome{
394 Variants: []tileVariantID{0, 0, 5, 1}, // het tv=1, tv=2
396 "sample4": CompactGenome{
397 Variants: []tileVariantID{0, 0, 9, 9}, // hom tv=3
400 maxv := tileVariantID(3)
401 remap := []tileVariantID{0, 1, 0, 0, 0, 2, 0, 0, 0, 3}
402 chunkstarttag := tagID(10)
403 fakevariant := TileVariant{Sequence: []byte("ACGT")}
404 seq := map[tagID][]TileVariant{}
405 for tag := tagID(10); tag < 12; tag++ {
406 seq[tag-chunkstarttag] = []TileVariant{TileVariant{}, fakevariant, TileVariant{}, TileVariant{}, TileVariant{}, fakevariant}
407 c.Logf("=== tag %d", tag)
408 chunk, xref := cmd.tv2homhet(cgs, maxv, remap, tag, chunkstarttag, seq)
409 c.Logf("chunk len=%d", len(chunk))
410 for _, x := range chunk {
413 c.Logf("xref len=%d", len(xref))
414 for _, x := range xref {
417 out := onehotcols2int8(chunk)
418 c.Logf("onehotcols2int8(chunk) len=%d", len(out))
419 for i := 0; i < len(out); i += len(chunk) {
420 c.Logf("%+v", out[i:i+len(chunk)])
422 coords := onehotChunk2Indirect(chunk)
423 c.Logf("onehotChunk2Indirect(chunk) len=%d", len(coords))
424 for _, x := range coords {