From 74ae10fa596b295fdd0904cf0568a3b3d7c5e0a9 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Tue, 30 Nov 2021 14:59:45 -0500 Subject: [PATCH] Call SNPs separately when called within 1bp of start/end of indels. fixes #18496 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- hgvs/diff.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ hgvs/diff_test.go | 15 +++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/hgvs/diff.go b/hgvs/diff.go index 835fcc2b97..57025e3588 100644 --- a/hgvs/diff.go +++ b/hgvs/diff.go @@ -143,6 +143,50 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) { in[i+1].Text+in[i+2].Text == in[i+2].Text+in[i+1].Text { in[i+2], in[i+1] = in[i+1], in[i+2] } + // when diffmatchpatch says [delAAA, insXAY] and + // len(X)==1, we prefer to treat the A>X as a snp. + if i < len(in)-1 && + d.Type == diffmatchpatch.DiffDelete && + in[i+1].Type == diffmatchpatch.DiffInsert && + len(d.Text) > 2 && + len(in[i+1].Text) > 2 && + d.Text[1] == in[i+1].Text[1] { + eqend := 2 + for ; eqend < len(d.Text) && eqend < len(in[i+1].Text) && d.Text[eqend] == in[i+1].Text[eqend]; eqend++ { + } + out = append(out, + diffmatchpatch.Diff{diffmatchpatch.DiffDelete, d.Text[:1]}, + diffmatchpatch.Diff{diffmatchpatch.DiffInsert, in[i+1].Text[:1]}, + diffmatchpatch.Diff{diffmatchpatch.DiffEqual, d.Text[1:eqend]}) + in[i].Text, in[i+1].Text = in[i].Text[eqend:], in[i+1].Text[eqend:] + i-- + continue + } + // when diffmatchpatch says [delAAA, insXaY] and + // len(Y)==1, we prefer to treat the A>Y as a snp. + if i < len(in)-1 && + d.Type == diffmatchpatch.DiffDelete && + in[i+1].Type == diffmatchpatch.DiffInsert && + len(d.Text) > 2 && + len(in[i+1].Text) > 2 && + d.Text[len(d.Text)-2] == in[i+1].Text[len(in[i+1].Text)-2] { + // eqstart will be the number of equal chars + // before the terminal snp, plus 1 for the snp + // itself. Example, for [delAAAA, insTTAAG], + // eqstart will be 3. + eqstart := 2 + for ; eqstart < len(d.Text) && eqstart < len(in[i+1].Text) && d.Text[len(d.Text)-eqstart] == in[i+1].Text[len(in[i+1].Text)-eqstart]; eqstart++ { + } + eqstart-- + out = append(out, + diffmatchpatch.Diff{diffmatchpatch.DiffDelete, d.Text[:len(d.Text)-eqstart]}, + diffmatchpatch.Diff{diffmatchpatch.DiffInsert, in[i+1].Text[:len(in[i+1].Text)-eqstart]}, + diffmatchpatch.Diff{diffmatchpatch.DiffEqual, d.Text[len(d.Text)-eqstart : len(d.Text)-1]}, + diffmatchpatch.Diff{diffmatchpatch.DiffDelete, d.Text[len(d.Text)-1:]}, + diffmatchpatch.Diff{diffmatchpatch.DiffInsert, in[i+1].Text[len(in[i+1].Text)-1:]}) + i++ + continue + } out = append(out, d) } return diff --git a/hgvs/diff_test.go b/hgvs/diff_test.go index cc211b5f2d..1032f4e3d2 100644 --- a/hgvs/diff_test.go +++ b/hgvs/diff_test.go @@ -113,6 +113,21 @@ func (s *diffSuite) TestDiff(c *check.C) { b: "tcaAaagac", expect: []string{"4G>A"}, }, + { + a: "tcagatggac", + b: "tcaAaCggac", + expect: []string{"4G>A", "6T>C"}, + }, + { + a: "tcagatggac", + b: "tcaAaCggTc", + expect: []string{"4G>A", "6T>C", "9A>T"}, + }, + { + a: "tcagatggac", + b: "tcaAaCCggTc", + expect: []string{"4G>A", "6delinsCC", "9A>T"}, + }, } { c.Log(trial) var vars []string -- 2.30.2