From 2382e6c62ced70780d4b56bf0569ce849fa3d9f0 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 3 Jun 2021 16:27:44 -0400 Subject: [PATCH] Fix another category of misspelled hgvs diff. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- hgvs/diff.go | 14 ++++++++++++++ hgvs/diff_test.go | 5 +++++ 2 files changed, 19 insertions(+) diff --git a/hgvs/diff.go b/hgvs/diff.go index e04ccff587..47deab7246 100644 --- a/hgvs/diff.go +++ b/hgvs/diff.go @@ -125,6 +125,20 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) { in[i+1] = ins in[i+2] = eq } + // diffmatchpatch solves diff("AXX","XXX") with + // [delA,=XX,insX] but we prefer to spell it + // [delA,insX,=XX]. + // + // So, when we see a [del,=,ins] sequence that has the + // same effect after swapping the "=" and "ins" parts, + // we swap them. + if i < len(in)-2 && + d.Type == diffmatchpatch.DiffDelete && + in[i+1].Type == diffmatchpatch.DiffEqual && + in[i+2].Type == diffmatchpatch.DiffInsert && + in[i+1].Text+in[i+2].Text == in[i+2].Text+in[i+1].Text { + in[i+2], in[i+1] = in[i+1], in[i+2] + } out = append(out, d) } return diff --git a/hgvs/diff_test.go b/hgvs/diff_test.go index 20043ca31a..9a4e35531c 100644 --- a/hgvs/diff_test.go +++ b/hgvs/diff_test.go @@ -104,6 +104,11 @@ func (s *diffSuite) TestDiff(c *check.C) { b: "acTtgaa", expect: []string{"3G>T"}, }, + { + a: "tcagaagac", + b: "tcaAaagac", + expect: []string{"4G>A"}, + }, } { c.Log(trial) var vars []string -- 2.30.2