1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
12 "github.com/kshedden/gonpy"
16 type sliceSuite struct{}
18 var _ = check.Suite(&sliceSuite{})
20 func (s *sliceSuite) TestImportAndSlice(c *check.C) {
22 err := os.Mkdir(tmpdir+"/lib1", 0777)
23 c.Assert(err, check.IsNil)
24 err = os.Mkdir(tmpdir+"/lib2", 0777)
25 c.Assert(err, check.IsNil)
26 err = os.Mkdir(tmpdir+"/lib3", 0777)
27 c.Assert(err, check.IsNil)
28 cwd, err := os.Getwd()
29 c.Assert(err, check.IsNil)
30 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
31 c.Assert(err, check.IsNil)
32 err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
33 c.Assert(err, check.IsNil)
35 err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
36 c.Check(err, check.IsNil)
38 c.Log("=== import testdata/ref ===")
39 exited := (&importer{}).RunCommand("import", []string{
41 "-tag-library", "testdata/tags",
43 "-save-incomplete-tiles",
44 "-o", tmpdir + "/lib1/library1.gob",
46 }, nil, os.Stderr, os.Stderr)
47 c.Assert(exited, check.Equals, 0)
49 c.Log("=== import testdata/pipeline1 ===")
50 exited = (&importer{}).RunCommand("import", []string{
52 "-tag-library", "testdata/tags",
54 "-o", tmpdir + "/lib2/library2.gob",
55 tmpdir + "/pipeline1",
56 }, nil, os.Stderr, os.Stderr)
57 c.Assert(exited, check.Equals, 0)
59 c.Log("=== import pipeline1dup ===")
60 exited = (&importer{}).RunCommand("import", []string{
62 "-tag-library", "testdata/tags",
64 "-o", tmpdir + "/lib3/library3.gob",
65 tmpdir + "/pipeline1dup",
66 }, nil, os.Stderr, os.Stderr)
67 c.Assert(exited, check.Equals, 0)
71 c.Log("=== slice ===")
72 exited = (&slicecmd{}).RunCommand("slice", []string{
74 "-output-dir=" + slicedir,
79 }, nil, os.Stderr, os.Stderr)
80 c.Check(exited, check.Equals, 0)
81 out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
87 exited = (&dump{}).RunCommand("dump", []string{
90 "-input-dir=" + slicedir,
91 "-output-dir=" + dumpdir,
92 }, nil, os.Stderr, os.Stderr)
93 c.Check(exited, check.Equals, 0)
94 out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
96 dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
97 c.Assert(err, check.IsNil)
99 c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTG.*`)
102 c.Log("=== slice-numpy ===")
105 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
107 "-input-dir=" + slicedir,
108 "-output-dir=" + npydir,
109 }, nil, os.Stderr, os.Stderr)
110 c.Check(exited, check.Equals, 0)
111 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
114 f, err := os.Open(npydir + "/matrix.0000.npy")
115 c.Assert(err, check.IsNil)
117 npy, err := gonpy.NewReader(f)
118 c.Assert(err, check.IsNil)
119 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
120 variants, err := npy.GetInt16()
121 c.Check(variants, check.DeepEquals, []int16{2, 1, 1, 2, -1, -1, 1, 1, 2, 1, 1, 2, -1, -1, 1, 1})
123 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
124 c.Assert(err, check.IsNil)
125 c.Logf("%s", annotations)
126 for _, s := range []string{
129 "chr1:g.1_3delinsGGC",
132 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
135 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
136 c.Assert(err, check.IsNil)
137 c.Logf("%s", annotations)
138 for _, s := range []string{
139 ",2,chr2:g.1_3delinsAAA",
140 ",2,chr2:g.125_127delinsAAA",
141 ",4,chr2:g.125_127delinsAAA",
143 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
147 c.Log("=== slice-numpy + regions ===")
150 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
152 "-regions=" + tmpdir + "/chr1-12-100.bed",
153 "-input-dir=" + slicedir,
154 "-output-dir=" + npydir,
155 "-chunked-hgvs-matrix=true",
156 }, nil, os.Stderr, os.Stderr)
157 c.Check(exited, check.Equals, 0)
158 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
161 f, err := os.Open(npydir + "/matrix.0000.npy")
162 c.Assert(err, check.IsNil)
164 npy, err := gonpy.NewReader(f)
165 c.Assert(err, check.IsNil)
166 c.Check(npy.Shape, check.DeepEquals, []int{4, 2})
167 variants, err := npy.GetInt16()
168 c.Check(variants, check.DeepEquals, []int16{2, 1, -1, -1, 2, 1, -1, -1})
170 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
171 c.Assert(err, check.IsNil)
172 c.Logf("%s", annotations)
173 for _, s := range []string{
176 "chr1:g.1_3delinsGGC",
179 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
182 for _, fnm := range []string{
183 npydir + "/matrix.0001.annotations.csv",
184 npydir + "/matrix.0002.annotations.csv",
186 annotations, err := ioutil.ReadFile(fnm)
187 c.Assert(err, check.IsNil)
188 c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
192 err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
193 c.Check(err, check.IsNil)
195 c.Log("=== slice-numpy + regions + merge ===")
198 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
200 "-regions=" + tmpdir + "/chr1and2-100-200.bed",
201 "-input-dir=" + slicedir,
202 "-output-dir=" + npydir,
203 "-merge-output=true",
204 "-single-hgvs-matrix=true",
205 }, nil, os.Stderr, os.Stderr)
206 c.Check(exited, check.Equals, 0)
207 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
210 f, err := os.Open(npydir + "/matrix.npy")
211 c.Assert(err, check.IsNil)
213 npy, err := gonpy.NewReader(f)
214 c.Assert(err, check.IsNil)
215 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
216 variants, err := npy.GetInt16()
217 if c.Check(err, check.IsNil) {
218 c.Check(variants, check.DeepEquals, []int16{
226 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
227 c.Assert(err, check.IsNil)
228 c.Logf("%s", annotations)
229 for _, s := range []string{
230 "0,0,1,chr1:g.161A>T",
231 "0,0,1,chr1:g.178A>T",
232 "4,1,2,chr2:g.125_127delinsAAA",
234 c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
238 c.Log("=== slice-numpy + chunked hgvs matrix ===")
240 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
243 pipeline1dup/input1 1
244 pipeline1dup/input2 0
246 c.Assert(err, check.IsNil)
248 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
250 "-chunked-hgvs-matrix=true",
251 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
252 "-chi2-case-control-column=CC",
254 "-min-coverage=0.75",
255 "-input-dir=" + slicedir,
256 "-output-dir=" + npydir,
257 }, nil, os.Stderr, os.Stderr)
258 c.Check(exited, check.Equals, 0)
259 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
262 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
263 c.Assert(err, check.IsNil)
264 c.Check(string(annotations), check.Equals, `0,chr2:g.470_472del
270 c.Log("=== slice-numpy + onehotChunked ===")
272 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
275 pipeline1dup/input1 1
276 pipeline1dup/input2 0
278 c.Assert(err, check.IsNil)
280 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
282 "-chunked-onehot=true",
283 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
284 "-chi2-case-control-column=CC",
286 "-min-coverage=0.75",
287 "-input-dir=" + slicedir,
288 "-output-dir=" + npydir,
289 }, nil, os.Stderr, os.Stderr)
290 c.Check(exited, check.Equals, 0)
291 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
294 f, err := os.Open(npydir + "/onehot.0002.npy")
295 c.Assert(err, check.IsNil)
297 npy, err := gonpy.NewReader(f)
298 c.Assert(err, check.IsNil)
299 c.Check(npy.Shape, check.DeepEquals, []int{4, 3})
300 onehot, err := npy.GetInt8()
301 if c.Check(err, check.IsNil) {
302 for r := 0; r < npy.Shape[0]; r++ {
303 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
305 c.Check(onehot, check.DeepEquals, []int8{
308 0, 1, 0, // dup/input1
309 1, 0, 1, // dup/input2
314 c.Log("=== slice-numpy + onehotSingle ===")
316 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID CC
319 pipeline1dup/input1 1
320 pipeline1dup/input2 0
322 c.Assert(err, check.IsNil)
324 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
326 "-single-onehot=true",
327 "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
328 "-chi2-case-control-column=CC",
330 "-min-coverage=0.75",
331 "-input-dir=" + slicedir,
332 "-output-dir=" + npydir,
334 }, nil, os.Stderr, os.Stderr)
335 c.Check(exited, check.Equals, 0)
336 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
339 f, err := os.Open(npydir + "/onehot.npy")
340 c.Assert(err, check.IsNil)
342 npy, err := gonpy.NewReader(f)
343 c.Assert(err, check.IsNil)
344 c.Check(npy.Shape, check.DeepEquals, []int{2, 16})
345 onehot, err := npy.GetUint32()
346 if c.Check(err, check.IsNil) {
347 for r := 0; r < npy.Shape[0]; r++ {
348 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
350 c.Check(onehot, check.DeepEquals, []uint32{
351 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 0, 2,
352 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
358 func (s *sliceSuite) Test_tv2homhet(c *check.C) {
360 cgnames: []string{"sample1", "sample2", "sample3", "sample4"},
361 chi2Cases: []bool{false, true, true, false},
363 includeVariant1: true,
365 cgs := map[string]CompactGenome{
366 "sample1": CompactGenome{
367 Variants: []tileVariantID{0, 0, 1, 1}, // hom tv=1
369 "sample2": CompactGenome{
370 Variants: []tileVariantID{0, 0, 5, 5}, // hom tv=2
372 "sample3": CompactGenome{
373 Variants: []tileVariantID{0, 0, 5, 1}, // het tv=1, tv=2
375 "sample4": CompactGenome{
376 Variants: []tileVariantID{0, 0, 9, 9}, // hom tv=3
379 maxv := tileVariantID(3)
380 remap := []tileVariantID{0, 1, 0, 0, 0, 2, 0, 0, 0, 3}
381 chunkstarttag := tagID(10)
382 for tag := tagID(10); tag < 12; tag++ {
383 c.Logf("=== tag %d", tag)
384 chunk, xref := cmd.tv2homhet(cgs, maxv, remap, tag, chunkstarttag)
385 c.Logf("chunk len=%d", len(chunk))
386 for _, x := range chunk {
389 c.Logf("xref len=%d", len(xref))
390 for _, x := range xref {
393 out := onehotcols2int8(chunk)
394 c.Logf("onehotcols2int8(chunk) len=%d", len(out))
395 for i := 0; i < len(out); i += len(chunk) {
396 c.Logf("%+v", out[i:i+len(chunk)])
398 coords := onehotChunk2Indirect(chunk)
399 c.Logf("onehotChunk2Indirect(chunk) len=%d", len(coords))
400 for _, x := range coords {