1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
12 "github.com/kshedden/gonpy"
16 type exportSuite struct{}
18 var _ = check.Suite(&exportSuite{})
20 func (s *exportSuite) TestFastaToHGVS(c *check.C) {
23 err := ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
24 c.Check(err, check.IsNil)
26 exited := (&importer{}).RunCommand("import", []string{
28 "-tag-library", "testdata/tags",
30 "-save-incomplete-tiles",
31 "-o", tmpdir + "/library1.gob",
33 }, nil, os.Stderr, os.Stderr)
34 c.Assert(exited, check.Equals, 0)
36 exited = (&importer{}).RunCommand("import", []string{
38 "-tag-library", "testdata/tags",
40 // "-save-incomplete-tiles",
41 "-o", tmpdir + "/library2.gob",
43 }, nil, os.Stderr, os.Stderr)
44 c.Assert(exited, check.Equals, 0)
46 exited = (&merger{}).RunCommand("merge", []string{
48 "-o", tmpdir + "/library.gob",
49 tmpdir + "/library1.gob",
50 tmpdir + "/library2.gob",
51 }, nil, os.Stderr, os.Stderr)
52 c.Assert(exited, check.Equals, 0)
54 input := tmpdir + "/library.gob"
56 exited = (&exporter{}).RunCommand("export", []string{
58 "-input-dir=" + input,
59 "-output-dir=" + tmpdir,
60 "-output-format=hgvs-onehot",
61 "-output-labels=" + tmpdir + "/labels.csv",
62 "-ref=testdata/ref.fasta",
63 }, nil, os.Stderr, os.Stderr)
64 c.Check(exited, check.Equals, 0)
65 output, err := ioutil.ReadFile(tmpdir + "/out.chr1.tsv")
66 if !c.Check(err, check.IsNil) {
67 out, _ := exec.Command("find", tmpdir, "-ls").CombinedOutput()
70 c.Check(sortLines(string(output)), check.Equals, sortLines(`chr1.1_3delinsGGC 1 0
76 chr1.302_305delinsAAAA 1 0
78 output, err = ioutil.ReadFile(tmpdir + "/out.chr2.tsv")
79 c.Check(err, check.IsNil)
80 c.Check(sortLines(string(output)), check.Equals, sortLines(`chr2.1_3delinsAAA 0 1
81 chr2.125_127delinsAAA 0 1
83 chr2.258_269delinsAA 1 0
89 labels, err := ioutil.ReadFile(tmpdir + "/labels.csv")
90 c.Check(err, check.IsNil)
91 c.Check(string(labels), check.Equals, `0,"input1","out.tsv"
95 exited = (&exporter{}).RunCommand("export", []string{
97 "-input-dir=" + input,
98 "-output-dir=" + tmpdir,
99 "-output-format=pvcf",
100 "-ref=testdata/ref.fasta",
101 }, os.Stderr, os.Stderr, os.Stderr)
102 c.Check(exited, check.Equals, 0)
103 output, err = ioutil.ReadFile(tmpdir + "/out.chr1.vcf")
104 c.Check(err, check.IsNil)
105 c.Log(string(output))
106 c.Check(sortLines(string(output)), check.Equals, sortLines(`##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
107 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testdata/pipeline1/input1.1.fasta testdata/pipeline1/input2.1.fasta
108 chr1 1 . NNN GGC . . . GT 1/1 0/0
109 chr1 41 . T A . . . GT 1/0 0/0
110 chr1 42 . T A . . . GT 1/0 0/0
111 chr1 161 . A T . . . GT 0/1 0/0
112 chr1 178 . A T . . . GT 0/1 0/0
113 chr1 221 . TCCA T . . . GT 1/1 0/0
114 chr1 302 . TTTT AAAA . . . GT 0/1 0/0
116 output, err = ioutil.ReadFile(tmpdir + "/out.chr2.vcf")
117 c.Check(err, check.IsNil)
118 c.Log(string(output))
119 c.Check(sortLines(string(output)), check.Equals, sortLines(`##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
120 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testdata/pipeline1/input1.1.fasta testdata/pipeline1/input2.1.fasta
121 chr2 1 . TTT AAA . . . GT 0/0 0/1
122 chr2 125 . CTT AAA . . . GT 0/0 1/1
123 chr2 240 . ATTTTTCTTGCTCTC A . . . GT 1/0 0/0
124 chr2 258 . CCTTGTATTTTT AA . . . GT 1/0 0/0
125 chr2 315 . C A . . . GT 1/0 0/0
126 chr2 469 . GTGG G . . . GT 1/0 0/0
127 chr2 471 . G A . . . GT 0/1 0/0
128 chr2 472 . G A . . . GT 0/1 0/0
131 exited = (&exporter{}).RunCommand("export", []string{
133 "-input-dir=" + input,
134 "-output-dir=" + tmpdir,
135 "-output-format=vcf",
136 "-ref=testdata/ref.fasta",
137 }, nil, os.Stderr, os.Stderr)
138 c.Check(exited, check.Equals, 0)
139 output, err = ioutil.ReadFile(tmpdir + "/out.chr1.vcf")
140 c.Check(err, check.IsNil)
141 c.Log(string(output))
142 c.Check(sortLines(string(output)), check.Equals, sortLines(`#CHROM POS ID REF ALT QUAL FILTER INFO
143 chr1 1 . NNN GGC . . AC=2
144 chr1 41 . T A . . AC=1
145 chr1 42 . T A . . AC=1
146 chr1 161 . A T . . AC=1
147 chr1 178 . A T . . AC=1
148 chr1 221 . TCCA T . . AC=2
149 chr1 302 . TTTT AAAA . . AC=1
151 output, err = ioutil.ReadFile(tmpdir + "/out.chr2.vcf")
152 c.Check(err, check.IsNil)
153 c.Log(string(output))
154 c.Check(sortLines(string(output)), check.Equals, sortLines(`#CHROM POS ID REF ALT QUAL FILTER INFO
155 chr2 1 . TTT AAA . . AC=1
156 chr2 125 . CTT AAA . . AC=2
157 chr2 240 . ATTTTTCTTGCTCTC A . . AC=1
158 chr2 258 . CCTTGTATTTTT AA . . AC=1
159 chr2 315 . C A . . AC=1
160 chr2 469 . GTGG G . . AC=1
161 chr2 471 . G A . . AC=1
162 chr2 472 . G A . . AC=1
165 c.Logf("export hgvs-numpy")
167 exited = (&exporter{}).RunCommand("export", []string{
169 "-input-dir=" + input,
170 "-output-dir=" + outdir,
171 "-output-format=hgvs-numpy",
172 "-ref=testdata/ref.fasta",
173 "-match-genome=input[12]",
174 }, nil, os.Stderr, os.Stderr)
175 c.Check(exited, check.Equals, 0)
177 f, err := os.Open(outdir + "/matrix.chr1.npy")
178 c.Assert(err, check.IsNil)
180 npy, err := gonpy.NewReader(f)
181 c.Assert(err, check.IsNil)
182 variants, err := npy.GetInt8()
183 c.Assert(err, check.IsNil)
184 c.Check(variants, check.HasLen, 7*2*2) // 7 variants * 2 alleles * 2 genomes
185 c.Check(variants, check.DeepEquals, []int8{
186 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, // input1.1.fasta
187 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, // input2.1.fasta
190 f, err = os.Open(outdir + "/matrix.chr2.npy")
191 c.Assert(err, check.IsNil)
193 npy, err = gonpy.NewReader(f)
194 c.Assert(err, check.IsNil)
195 variants, err = npy.GetInt8()
196 c.Assert(err, check.IsNil)
197 c.Check(variants, check.HasLen, 8*2*2) // 8 variants * 2 alleles * 2 genomes
198 c.Check(variants, check.DeepEquals, []int8{
199 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, // input1.1.fasta
200 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // input2.1.fasta
203 annotations, err := ioutil.ReadFile(outdir + "/annotations.chr1.csv")
204 c.Check(err, check.IsNil)
205 c.Logf("%s", string(annotations))
206 c.Check(string(annotations), check.Equals, `0,"chr1.1_3delinsGGC"
212 6,"chr1.302_305delinsAAAA"
214 annotations, err = ioutil.ReadFile(outdir + "/annotations.chr2.csv")
215 c.Check(err, check.IsNil)
216 c.Check(string(annotations), check.Equals, `0,"chr2.1_3delinsAAA"
217 1,"chr2.125_127delinsAAA"
219 3,"chr2.258_269delinsAA"
226 c.Logf("export hgvs-numpy with p-value threshold")
228 err = ioutil.WriteFile(tmpdir+"/cases", []byte("input1\n"), 0777)
229 c.Assert(err, check.IsNil)
230 exited = (&exporter{}).RunCommand("export", []string{
232 "-input-dir=" + input,
234 "-cases=" + tmpdir + "/cases",
235 "-output-dir=" + outdir,
236 "-output-format=hgvs-numpy",
237 "-ref=testdata/ref.fasta",
238 "-match-genome=input[12]",
239 }, nil, os.Stderr, os.Stderr)
240 c.Check(exited, check.Equals, 0)