1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
13 "github.com/kshedden/gonpy"
17 type exportSuite struct{}
19 var _ = check.Suite(&exportSuite{})
21 func (s *exportSuite) TestFastaToHGVS(c *check.C) {
24 err := ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
25 c.Check(err, check.IsNil)
27 var buffer bytes.Buffer
28 exited := (&importer{}).RunCommand("import", []string{"-local=true", "-tag-library", "testdata/tags", "-output-tiles", "-save-incomplete-tiles", "testdata/pipeline1", "testdata/ref.fasta"}, &bytes.Buffer{}, &buffer, os.Stderr)
29 c.Assert(exited, check.Equals, 0)
30 ioutil.WriteFile(tmpdir+"/library.gob", buffer.Bytes(), 0644)
32 exited = (&exporter{}).RunCommand("export", []string{
34 "-input-dir=" + tmpdir,
35 "-output-dir=" + tmpdir,
36 "-output-format=hgvs-onehot",
37 "-output-labels=" + tmpdir + "/labels.csv",
38 "-ref=testdata/ref.fasta",
39 }, &buffer, os.Stderr, os.Stderr)
40 c.Check(exited, check.Equals, 0)
41 output, err := ioutil.ReadFile(tmpdir + "/out.chr1.tsv")
42 if !c.Check(err, check.IsNil) {
43 out, _ := exec.Command("find", tmpdir, "-ls").CombinedOutput()
46 c.Check(sortLines(string(output)), check.Equals, sortLines(`chr1.1_3delinsGGC 1 0
47 chr1.41_42delinsAA 1 0
51 chr1.302_305delinsAAAA 1 0
53 output, err = ioutil.ReadFile(tmpdir + "/out.chr2.tsv")
54 c.Check(err, check.IsNil)
55 c.Check(sortLines(string(output)), check.Equals, sortLines(`chr2.1_3delinsAAA 0 1
56 chr2.125_127delinsAAA 0 1
58 chr2.258_269delinsAA 1 0
61 chr2.471_472delinsAA 1 0
63 labels, err := ioutil.ReadFile(tmpdir + "/labels.csv")
64 c.Check(err, check.IsNil)
65 c.Check(string(labels), check.Equals, `0,"input1","out.tsv"
69 exited = (&exporter{}).RunCommand("export", []string{
71 "-input-dir=" + tmpdir,
72 "-output-dir=" + tmpdir,
73 "-output-format=pvcf",
74 "-ref=testdata/ref.fasta",
75 }, &buffer, os.Stderr, os.Stderr)
76 c.Check(exited, check.Equals, 0)
77 output, err = ioutil.ReadFile(tmpdir + "/out.chr1.vcf")
78 c.Check(err, check.IsNil)
80 c.Check(sortLines(string(output)), check.Equals, sortLines(`##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
81 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testdata/pipeline1/input1.1.fasta testdata/pipeline1/input2.1.fasta
82 chr1 1 . NNN GGC . . . GT 1/1 0/0
83 chr1 41 . TT AA . . . GT 1/0 0/0
84 chr1 161 . A T . . . GT 0/1 0/0
85 chr1 178 . A T . . . GT 0/1 0/0
86 chr1 221 . TCCA T . . . GT 1/1 0/0
87 chr1 302 . TTTT AAAA . . . GT 0/1 0/0
89 output, err = ioutil.ReadFile(tmpdir + "/out.chr2.vcf")
90 c.Check(err, check.IsNil)
92 c.Check(sortLines(string(output)), check.Equals, sortLines(`##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
93 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testdata/pipeline1/input1.1.fasta testdata/pipeline1/input2.1.fasta
94 chr2 1 . TTT AAA . . . GT 0/0 0/1
95 chr2 125 . CTT AAA . . . GT 0/0 1/1
96 chr2 240 . ATTTTTCTTGCTCTC A . . . GT 1/0 0/0
97 chr2 258 . CCTTGTATTTTT AA . . . GT 1/0 0/0
98 chr2 315 . C A . . . GT 1/0 0/0
99 chr2 469 . GTGG G . . . GT 1/0 0/0
100 chr2 471 . GG AA . . . GT 0/1 0/0
103 exited = (&exporter{}).RunCommand("export", []string{
105 "-input-dir=" + tmpdir,
106 "-output-dir=" + tmpdir,
107 "-output-format=vcf",
108 "-ref=testdata/ref.fasta",
109 }, &buffer, os.Stderr, os.Stderr)
110 c.Check(exited, check.Equals, 0)
111 output, err = ioutil.ReadFile(tmpdir + "/out.chr1.vcf")
112 c.Check(err, check.IsNil)
113 c.Log(string(output))
114 c.Check(sortLines(string(output)), check.Equals, sortLines(`#CHROM POS ID REF ALT QUAL FILTER INFO
115 chr1 1 . NNN GGC . . AC=2
116 chr1 41 . TT AA . . AC=1
117 chr1 161 . A T . . AC=1
118 chr1 178 . A T . . AC=1
119 chr1 221 . TCCA T . . AC=2
120 chr1 302 . TTTT AAAA . . AC=1
122 output, err = ioutil.ReadFile(tmpdir + "/out.chr2.vcf")
123 c.Check(err, check.IsNil)
124 c.Log(string(output))
125 c.Check(sortLines(string(output)), check.Equals, sortLines(`#CHROM POS ID REF ALT QUAL FILTER INFO
126 chr2 1 . TTT AAA . . AC=1
127 chr2 125 . CTT AAA . . AC=2
128 chr2 240 . ATTTTTCTTGCTCTC A . . AC=1
129 chr2 258 . CCTTGTATTTTT AA . . AC=1
130 chr2 315 . C A . . AC=1
131 chr2 469 . GTGG G . . AC=1
132 chr2 471 . GG AA . . AC=1
135 c.Logf("export hgvs-numpy")
137 exited = (&exporter{}).RunCommand("export", []string{
139 "-input-dir=" + tmpdir,
140 "-output-dir=" + outdir,
141 "-output-format=hgvs-numpy",
142 "-ref=testdata/ref.fasta",
143 }, &buffer, os.Stderr, os.Stderr)
144 c.Check(exited, check.Equals, 0)
146 f, err := os.Open(outdir + "/matrix.chr1.npy")
147 c.Assert(err, check.IsNil)
149 npy, err := gonpy.NewReader(f)
150 c.Assert(err, check.IsNil)
151 variants, err := npy.GetInt8()
152 c.Assert(err, check.IsNil)
153 c.Check(variants, check.HasLen, 6*2*2) // 6 variants * 2 alleles * 2 genomes
154 c.Check(variants, check.DeepEquals, []int8{
155 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, // input1.1.fasta
156 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // input2.1.fasta
159 f, err = os.Open(outdir + "/matrix.chr2.npy")
160 c.Assert(err, check.IsNil)
162 npy, err = gonpy.NewReader(f)
163 c.Assert(err, check.IsNil)
164 variants, err = npy.GetInt8()
165 c.Assert(err, check.IsNil)
166 c.Check(variants, check.HasLen, 7*2*2) // 6 variants * 2 alleles * 2 genomes
167 c.Check(variants, check.DeepEquals, []int8{
168 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, // input1.1.fasta
169 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // input2.1.fasta
172 annotations, err := ioutil.ReadFile(outdir + "/annotations.chr1.csv")
173 c.Check(err, check.IsNil)
174 c.Logf("%s", string(annotations))
175 c.Check(string(annotations), check.Equals, `0,"chr1.1_3delinsGGC"
176 1,"chr1.41_42delinsAA"
180 5,"chr1.302_305delinsAAAA"
182 annotations, err = ioutil.ReadFile(outdir + "/annotations.chr2.csv")
183 c.Check(err, check.IsNil)
184 c.Check(string(annotations), check.Equals, `0,"chr2.1_3delinsAAA"
185 1,"chr2.125_127delinsAAA"
187 3,"chr2.258_269delinsAA"
190 6,"chr2.471_472delinsAA"