Update memory-size log message.
[lightning.git] / slice_test.go
1 // Copyright (C) The Lightning Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package lightning
6
7 import (
8         "io/ioutil"
9         "os"
10         "os/exec"
11
12         "github.com/kshedden/gonpy"
13         "gopkg.in/check.v1"
14 )
15
16 type sliceSuite struct{}
17
18 var _ = check.Suite(&sliceSuite{})
19
20 func (s *sliceSuite) TestImportAndSlice(c *check.C) {
21         tmpdir := c.MkDir()
22         err := os.Mkdir(tmpdir+"/lib1", 0777)
23         c.Assert(err, check.IsNil)
24         err = os.Mkdir(tmpdir+"/lib2", 0777)
25         c.Assert(err, check.IsNil)
26         err = os.Mkdir(tmpdir+"/lib3", 0777)
27         c.Assert(err, check.IsNil)
28         cwd, err := os.Getwd()
29         c.Assert(err, check.IsNil)
30         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
31         c.Assert(err, check.IsNil)
32         err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
33         c.Assert(err, check.IsNil)
34
35         err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
36         c.Check(err, check.IsNil)
37
38         c.Log("=== import testdata/ref ===")
39         exited := (&importer{}).RunCommand("import", []string{
40                 "-local=true",
41                 "-tag-library", "testdata/tags",
42                 "-output-tiles",
43                 "-save-incomplete-tiles",
44                 "-o", tmpdir + "/lib1/library1.gob",
45                 "testdata/ref.fasta",
46         }, nil, os.Stderr, os.Stderr)
47         c.Assert(exited, check.Equals, 0)
48
49         c.Log("=== import testdata/pipeline1 ===")
50         exited = (&importer{}).RunCommand("import", []string{
51                 "-local=true",
52                 "-tag-library", "testdata/tags",
53                 "-output-tiles",
54                 "-o", tmpdir + "/lib2/library2.gob",
55                 tmpdir + "/pipeline1",
56         }, nil, os.Stderr, os.Stderr)
57         c.Assert(exited, check.Equals, 0)
58
59         c.Log("=== import pipeline1dup ===")
60         exited = (&importer{}).RunCommand("import", []string{
61                 "-local=true",
62                 "-tag-library", "testdata/tags",
63                 "-output-tiles",
64                 "-o", tmpdir + "/lib3/library3.gob",
65                 tmpdir + "/pipeline1dup",
66         }, nil, os.Stderr, os.Stderr)
67         c.Assert(exited, check.Equals, 0)
68
69         slicedir := c.MkDir()
70
71         c.Log("=== slice ===")
72         exited = (&slicecmd{}).RunCommand("slice", []string{
73                 "-local=true",
74                 "-output-dir=" + slicedir,
75                 "-tags-per-file=2",
76                 tmpdir + "/lib1",
77                 tmpdir + "/lib2",
78                 tmpdir + "/lib3",
79         }, nil, os.Stderr, os.Stderr)
80         c.Check(exited, check.Equals, 0)
81         out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
82         c.Logf("%s", out)
83
84         c.Log("=== slice-numpy ===")
85         {
86                 npydir := c.MkDir()
87                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
88                         "-local=true",
89                         "-input-dir=" + slicedir,
90                         "-output-dir=" + npydir,
91                 }, nil, os.Stderr, os.Stderr)
92                 c.Check(exited, check.Equals, 0)
93                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
94                 c.Logf("%s", out)
95
96                 f, err := os.Open(npydir + "/matrix.0000.npy")
97                 c.Assert(err, check.IsNil)
98                 defer f.Close()
99                 npy, err := gonpy.NewReader(f)
100                 c.Assert(err, check.IsNil)
101                 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
102                 variants, err := npy.GetInt16()
103                 c.Check(variants, check.DeepEquals, []int16{2, 1, 1, 2, -1, -1, 1, 1, 2, 1, 1, 2, -1, -1, 1, 1})
104
105                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
106                 c.Assert(err, check.IsNil)
107                 c.Logf("%s", annotations)
108                 for _, s := range []string{
109                         "chr1:g.161A>T",
110                         "chr1:g.178A>T",
111                         "chr1:g.1_3delinsGGC",
112                         "chr1:g.222_224del",
113                 } {
114                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
115                 }
116
117                 annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
118                 c.Assert(err, check.IsNil)
119                 c.Logf("%s", annotations)
120                 for _, s := range []string{
121                         ",2,chr2:g.1_3delinsAAA",
122                         ",2,chr2:g.125_127delinsAAA",
123                         ",4,chr2:g.125_127delinsAAA",
124                 } {
125                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
126                 }
127         }
128
129         c.Log("=== slice-numpy + regions ===")
130         {
131                 npydir := c.MkDir()
132                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
133                         "-local=true",
134                         "-regions=" + tmpdir + "/chr1-12-100.bed",
135                         "-input-dir=" + slicedir,
136                         "-output-dir=" + npydir,
137                         "-chunked-hgvs-matrix=true",
138                 }, nil, os.Stderr, os.Stderr)
139                 c.Check(exited, check.Equals, 0)
140                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
141                 c.Logf("%s", out)
142
143                 f, err := os.Open(npydir + "/matrix.0000.npy")
144                 c.Assert(err, check.IsNil)
145                 defer f.Close()
146                 npy, err := gonpy.NewReader(f)
147                 c.Assert(err, check.IsNil)
148                 c.Check(npy.Shape, check.DeepEquals, []int{4, 2})
149                 variants, err := npy.GetInt16()
150                 c.Check(variants, check.DeepEquals, []int16{2, 1, -1, -1, 2, 1, -1, -1})
151
152                 annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
153                 c.Assert(err, check.IsNil)
154                 c.Logf("%s", annotations)
155                 for _, s := range []string{
156                         "chr1:g.161A>T",
157                         "chr1:g.178A>T",
158                         "chr1:g.1_3delinsGGC",
159                         "chr1:g.222_224del",
160                 } {
161                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
162                 }
163
164                 for _, fnm := range []string{
165                         npydir + "/matrix.0001.annotations.csv",
166                         npydir + "/matrix.0002.annotations.csv",
167                 } {
168                         annotations, err := ioutil.ReadFile(fnm)
169                         c.Assert(err, check.IsNil)
170                         c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
171                 }
172         }
173
174         err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
175         c.Check(err, check.IsNil)
176
177         c.Log("=== slice-numpy + regions + merge ===")
178         {
179                 npydir := c.MkDir()
180                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
181                         "-local=true",
182                         "-regions=" + tmpdir + "/chr1and2-100-200.bed",
183                         "-input-dir=" + slicedir,
184                         "-output-dir=" + npydir,
185                         "-merge-output=true",
186                         "-single-hgvs-matrix=true",
187                 }, nil, os.Stderr, os.Stderr)
188                 c.Check(exited, check.Equals, 0)
189                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
190                 c.Logf("%s", out)
191
192                 f, err := os.Open(npydir + "/matrix.npy")
193                 c.Assert(err, check.IsNil)
194                 defer f.Close()
195                 npy, err := gonpy.NewReader(f)
196                 c.Assert(err, check.IsNil)
197                 c.Check(npy.Shape, check.DeepEquals, []int{4, 4})
198                 variants, err := npy.GetInt16()
199                 if c.Check(err, check.IsNil) {
200                         c.Check(variants, check.DeepEquals, []int16{2, 1, 3, 1, -1, -1, 4, 2, 2, 1, 3, 1, -1, -1, 4, 2})
201                 }
202
203                 annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
204                 c.Assert(err, check.IsNil)
205                 c.Logf("%s", annotations)
206                 for _, s := range []string{
207                         "0,0,1,chr1:g.161A>T",
208                         "0,0,1,chr1:g.178A>T",
209                         "4,1,2,chr2:g.125_127delinsAAA",
210                 } {
211                         c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
212                 }
213         }
214
215         c.Log("=== slice-numpy + chunked hgvs matrix ===")
216         {
217                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
218 pipeline1/input1        1
219 pipeline1/input2        0
220 pipeline1dup/input1     1
221 pipeline1dup/input2     0
222 `), 0600)
223                 c.Assert(err, check.IsNil)
224                 npydir := c.MkDir()
225                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
226                         "-local=true",
227                         "-chunked-hgvs-matrix=true",
228                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
229                         "-chi2-case-control-column=CC",
230                         "-chi2-p-value=0.5",
231                         "-min-coverage=0.75",
232                         "-input-dir=" + slicedir,
233                         "-output-dir=" + npydir,
234                 }, nil, os.Stderr, os.Stderr)
235                 c.Check(exited, check.Equals, 0)
236                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
237                 c.Logf("%s", out)
238
239                 annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
240                 c.Assert(err, check.IsNil)
241                 c.Check(string(annotations), check.Equals, `0,chr2:g.470_472del
242 1,chr2:g.471G>A
243 2,chr2:g.472G>A
244 `)
245         }
246
247         c.Log("=== slice-numpy + onehotChunked ===")
248         {
249                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
250 pipeline1/input1        1
251 pipeline1/input2        0
252 pipeline1dup/input1     1
253 pipeline1dup/input2     0
254 `), 0600)
255                 c.Assert(err, check.IsNil)
256                 npydir := c.MkDir()
257                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
258                         "-local=true",
259                         "-chunked-onehot=true",
260                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
261                         "-chi2-case-control-column=CC",
262                         "-chi2-p-value=0.5",
263                         "-min-coverage=0.75",
264                         "-input-dir=" + slicedir,
265                         "-output-dir=" + npydir,
266                 }, nil, os.Stderr, os.Stderr)
267                 c.Check(exited, check.Equals, 0)
268                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
269                 c.Logf("%s", out)
270
271                 f, err := os.Open(npydir + "/onehot.0002.npy")
272                 c.Assert(err, check.IsNil)
273                 defer f.Close()
274                 npy, err := gonpy.NewReader(f)
275                 c.Assert(err, check.IsNil)
276                 c.Check(npy.Shape, check.DeepEquals, []int{4, 6})
277                 onehot, err := npy.GetInt8()
278                 if c.Check(err, check.IsNil) {
279                         for r := 0; r < npy.Shape[0]; r++ {
280                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
281                         }
282                         c.Check(onehot, check.DeepEquals, []int8{
283                                 0, 0, 0, 1, 0, 0, // input1
284                                 0, 1, 0, 0, 0, 1, // input2
285                                 0, 0, 0, 1, 0, 0, // dup/input1
286                                 0, 1, 0, 0, 0, 1, // dup/input2
287                         })
288                 }
289         }
290
291         c.Log("=== slice-numpy + onehotSingle ===")
292         {
293                 err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
294 pipeline1/input1        1
295 pipeline1/input2        0
296 pipeline1dup/input1     1
297 pipeline1dup/input2     0
298 `), 0600)
299                 c.Assert(err, check.IsNil)
300                 npydir := c.MkDir()
301                 exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
302                         "-local=true",
303                         "-single-onehot=true",
304                         "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
305                         "-chi2-case-control-column=CC",
306                         "-chi2-p-value=0.5",
307                         "-min-coverage=0.75",
308                         "-input-dir=" + slicedir,
309                         "-output-dir=" + npydir,
310                 }, nil, os.Stderr, os.Stderr)
311                 c.Check(exited, check.Equals, 0)
312                 out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
313                 c.Logf("%s", out)
314
315                 f, err := os.Open(npydir + "/onehot.npy")
316                 c.Assert(err, check.IsNil)
317                 defer f.Close()
318                 npy, err := gonpy.NewReader(f)
319                 c.Assert(err, check.IsNil)
320                 c.Check(npy.Shape, check.DeepEquals, []int{2, 16})
321                 onehot, err := npy.GetUint32()
322                 if c.Check(err, check.IsNil) {
323                         for r := 0; r < npy.Shape[0]; r++ {
324                                 c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
325                         }
326                         c.Check(onehot, check.DeepEquals, []uint32{
327                                 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 0, 2,
328                                 1, 1, 2, 2, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15,
329                         })
330                 }
331         }
332 }