c.Check(exited, check.Equals, 0)
c.Check("\n"+output.String(), check.Equals, `
chr2:g.1008C>G chr2 1008 C G false
-chr2:g.1028_1029delinsTT chr2 1028 AA TT false
+chr2:g.1028A>T chr2 1028 A T false
+chr2:g.1029A>T chr2 1029 A T false
chr2:g.1032_1033insA chr2 1033 A false
`)
}
c.Logf("%s", out)
}
c.Check(sortLines(string(output)), check.Equals, sortLines(`chr1.1_3delinsGGC 1 0
-chr1.41_42delinsAA 1 0
+chr1.41T>A 1 0
+chr1.42T>A 1 0
chr1.161A>T 1 0
chr1.178A>T 1 0
chr1.222_224del 1 0
chr2.258_269delinsAA 1 0
chr2.315C>A 1 0
chr2.470_472del 1 0
-chr2.471_472delinsAA 1 0
+chr2.471G>A 1 0
+chr2.472G>A 1 0
`))
labels, err := ioutil.ReadFile(tmpdir + "/labels.csv")
c.Check(err, check.IsNil)
c.Check(sortLines(string(output)), check.Equals, sortLines(`##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testdata/pipeline1/input1.1.fasta testdata/pipeline1/input2.1.fasta
chr1 1 . NNN GGC . . . GT 1/1 0/0
-chr1 41 . TT AA . . . GT 1/0 0/0
+chr1 41 . T A . . . GT 1/0 0/0
+chr1 42 . T A . . . GT 1/0 0/0
chr1 161 . A T . . . GT 0/1 0/0
chr1 178 . A T . . . GT 0/1 0/0
chr1 221 . TCCA T . . . GT 1/1 0/0
chr2 258 . CCTTGTATTTTT AA . . . GT 1/0 0/0
chr2 315 . C A . . . GT 1/0 0/0
chr2 469 . GTGG G . . . GT 1/0 0/0
-chr2 471 . GG AA . . . GT 0/1 0/0
+chr2 471 . G A . . . GT 0/1 0/0
+chr2 472 . G A . . . GT 0/1 0/0
`))
exited = (&exporter{}).RunCommand("export", []string{
c.Log(string(output))
c.Check(sortLines(string(output)), check.Equals, sortLines(`#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 1 . NNN GGC . . AC=2
-chr1 41 . TT AA . . AC=1
+chr1 41 . T A . . AC=1
+chr1 42 . T A . . AC=1
chr1 161 . A T . . AC=1
chr1 178 . A T . . AC=1
chr1 221 . TCCA T . . AC=2
chr2 258 . CCTTGTATTTTT AA . . AC=1
chr2 315 . C A . . AC=1
chr2 469 . GTGG G . . AC=1
-chr2 471 . GG AA . . AC=1
+chr2 471 . G A . . AC=1
+chr2 472 . G A . . AC=1
`))
c.Logf("export hgvs-numpy")
c.Assert(err, check.IsNil)
variants, err := npy.GetInt8()
c.Assert(err, check.IsNil)
- c.Check(variants, check.HasLen, 6*2*2) // 6 variants * 2 alleles * 2 genomes
+ c.Check(variants, check.HasLen, 7*2*2) // 7 variants * 2 alleles * 2 genomes
c.Check(variants, check.DeepEquals, []int8{
- 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, // input1.1.fasta
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, // input2.1.fasta
+ 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, // input1.1.fasta
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, // input2.1.fasta
})
f, err = os.Open(outdir + "/matrix.chr2.npy")
c.Assert(err, check.IsNil)
variants, err = npy.GetInt8()
c.Assert(err, check.IsNil)
- c.Check(variants, check.HasLen, 7*2*2) // 6 variants * 2 alleles * 2 genomes
+ c.Check(variants, check.HasLen, 8*2*2) // 8 variants * 2 alleles * 2 genomes
c.Check(variants, check.DeepEquals, []int8{
- 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, // input1.1.fasta
- 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // input2.1.fasta
+ 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, // input1.1.fasta
+ 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // input2.1.fasta
})
annotations, err := ioutil.ReadFile(outdir + "/annotations.chr1.csv")
c.Check(err, check.IsNil)
c.Logf("%s", string(annotations))
c.Check(string(annotations), check.Equals, `0,"chr1.1_3delinsGGC"
-1,"chr1.41_42delinsAA"
-2,"chr1.161A>T"
-3,"chr1.178A>T"
-4,"chr1.222_224del"
-5,"chr1.302_305delinsAAAA"
+1,"chr1.41T>A"
+2,"chr1.42T>A"
+3,"chr1.161A>T"
+4,"chr1.178A>T"
+5,"chr1.222_224del"
+6,"chr1.302_305delinsAAAA"
`)
annotations, err = ioutil.ReadFile(outdir + "/annotations.chr2.csv")
c.Check(err, check.IsNil)
3,"chr2.258_269delinsAA"
4,"chr2.315C>A"
5,"chr2.470_472del"
-6,"chr2.471_472delinsAA"
+6,"chr2.471G>A"
+7,"chr2.472G>A"
`)
c.Logf("export hgvs-numpy with p-value threshold")
v.New += diffs[i].Text
}
}
+ if len(v.Ref) == 2 && len(v.New) == 2 {
+ v1 := v
+ v1.Ref = v1.Ref[:1]
+ v1.New = v1.New[:1]
+ v.Ref = v.Ref[1:]
+ v.New = v.New[1:]
+ v.Position++
+ v.Left = v1.Ref
+ pos++
+ variants = append(variants, v1)
+ }
pos += len(v.Ref)
variants = append(variants, v)
left = ""
}
out = append(out, d)
}
+ // for i := 0; i < len(out)-1; i++ {
+ // if out[i].Type == diffmatchpatch.DiffDelete && len(out[i].Text) == 2 &&
+ // out[i+1].Type == diffmatchpatch.DiffInsert && len(out[i+1].Text) == 2 {
+ // out = append(out, diffmatchpatch.Diff{}, diffmatchpatch.Diff{})
+ // copy(out[i+4:], out[i+2:])
+ // out[i+2] = diffmatchpatch.Diff{diffmatchpatch.DiffDelete, out[i].Text[1:]}
+ // out[i+3] = diffmatchpatch.Diff{diffmatchpatch.DiffInsert, out[i+1].Text[1:]}
+ // out[i].Text = out[i].Text[:1]
+ // out[i+1].Text = out[i+1].Text[:1]
+ // }
+ // }
return
}
{
a: "aaGGttAAtttt",
b: "aaCCttttttC",
- expect: []string{"3_4delinsCC", "7_8del", "12_13insC"},
+ expect: []string{"3G>C", "4G>C", "7_8del", "12_13insC"},
},
{
// without cleanup, diffmatchpatch solves this as {"3del", "=A", "4_5insA"}
// without cleanup, diffmatchpatch solves this as {"3_4del", "=A", "5_6insAA"}
a: "agggaggggg",
b: "agAAaggggg",
- expect: []string{"3_4delinsAA"},
+ expect: []string{"3G>A", "4G>A"},
},
{
// without cleanup, diffmatchpatch solves this as {"3_4del", "=A", "5_6insCA"}
a: "agggaggggg",
b: "agACaggggg",
- expect: []string{"3_4delinsAC"},
+ expect: []string{"3G>A", "4G>C"},
},
{
// without cleanup, diffmatchpatch solves this as {"3_7del", "=A", "8_9insAAACA"}
{
a: "agggaggggg",
b: "agCAaggggg",
- expect: []string{"3_4delinsCA"},
+ expect: []string{"3G>C", "4G>A"},
},
{
a: "agggg",
b: "agAAg",
- expect: []string{"3_4delinsAA"},
+ expect: []string{"3G>A", "4G>A"},
+ },
+ {
+ a: "aggggg",
+ b: "agAAAg",
+ expect: []string{"3_5delinsAAA"},
},
{
a: "acgtgaa",
b: "tcacaagac",
expect: []string{"4T>C", "6del"},
},
+ {
+ a: "tcatcgagac",
+ b: "tcGCcgagac",
+ expect: []string{"3A>G", "4T>C"},
+ },
+ {
+ a: "tcatcgagac",
+ b: "tcGCcggac",
+ expect: []string{"3A>G", "4T>C", "7del"},
+ },
} {
c.Log(trial)
var vars []string
hgvsout, err := ioutil.ReadFile(tmpdir + "/out.tsv")
c.Check(err, check.IsNil)
c.Check(sortLines(string(hgvsout)), check.Equals, sortLines(`chr1:g.1_3delinsGGC N
-chr1:g.[41_42delinsAA];[41=] N
+chr1:g.[41T>A];[41=] N
+chr1:g.[42T>A];[42=] N
chr1:g.[161=];[161A>T] N
chr1:g.[178=];[178A>T] N
chr1:g.222_224del N
chr2:g.[258_269delinsAA];[258=] .
chr2:g.[315C>A];[315=] .
chr2:g.[470_472del];[470=] .
-chr2:g.[471=];[471_472delinsAA] .
+chr2:g.[471=];[471G>A] .
+chr2:g.[472=];[472G>A] .
`))
code = (&exporter{}).RunCommand("lightning export", []string{"-local", "-ref", "testdata/ref.fasta", "-output-dir", tmpdir, "-output-format", "pvcf", "-input-dir", tmpdir + "/merged", "-output-bed", tmpdir + "/export.bed", "-output-per-chromosome=false"}, bytes.NewReader(nil), os.Stderr, os.Stderr)
c.Check(sortLines(string(vcfout)), check.Equals, sortLines(`##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testdata/pipeline1/input1.1.fasta testdata/pipeline1/input2.1.fasta
chr1 1 . NNN GGC . . . GT 1/1 0/0
-chr1 41 . TT AA . . . GT 1/0 0/0
+chr1 41 . T A . . . GT 1/0 0/0
+chr1 42 . T A . . . GT 1/0 0/0
chr1 161 . A T . . . GT 0/1 0/0
chr1 178 . A T . . . GT 0/1 0/0
chr1 221 . TCCA T . . . GT 1/1 0/0
chr2 258 . CCTTGTATTTTT AA . . . GT 1/0 0/0
chr2 315 . C A . . . GT 1/0 0/0
chr2 469 . GTGG G . . . GT 1/0 0/0
-chr2 471 . GG AA . . . GT 0/1 0/0
+chr2 471 . G A . . . GT 0/1 0/0
+chr2 472 . G A . . . GT 0/1 0/0
`))
bedout, err := ioutil.ReadFile(tmpdir + "/export.bed")
c.Check(err, check.IsNil)
0,0,8d4fe9a63921b,chr1:g.222_224del
0,0,ba4263ca4199c,chr1:g.1_3delinsGGC
0,0,ba4263ca4199c,chr1:g.222_224del
-0,0,ba4263ca4199c,chr1:g.41_42delinsAA
+0,0,ba4263ca4199c,chr1:g.41T>A
+0,0,ba4263ca4199c,chr1:g.42T>A
1,1,139890345dbb8,chr1:g.302_305delinsAAAA
4,4,cbfca15d241d3,chr2:g.125_127delinsAAA
4,4,cbfca15d241d3,chr2:g.1_3delinsAAA
4,4,f5fafe9450b02,chr2:g.241_245delinsAAAAA
4,4,f5fafe9450b02,chr2:g.291C>A
4,4,fe9a71a42adb4,chr2:g.125_127delinsAAA
-6,6,e36dce85efbef,chr2:g.471_472delinsAA
+6,6,e36dce85efbef,chr2:g.471G>A
+6,6,e36dce85efbef,chr2:g.472G>A
6,6,f81388b184f4a,chr2:g.470_472del
`))
}