From d59f4efde5eece7d3b3ea965895397c8ad4c936e Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 24 Feb 2022 10:50:13 -0500 Subject: [PATCH] Fix left-most diff cases. refs #18721 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- hgvs/diff.go | 44 +++++++++++++++++++++++--------------------- hgvs/diff_test.go | 32 +++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/hgvs/diff.go b/hgvs/diff.go index 780cfacf10..e2b73439bf 100644 --- a/hgvs/diff.go +++ b/hgvs/diff.go @@ -126,6 +126,23 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) { in, out = out, make([]diffmatchpatch.Diff, 0, len(in)) for i := 0; i < len(in); i++ { d := in[i] + // when diffmatchpatch says [=yyyyXXXX, delX, =zzz], + // we really want [=yyyy, delX, =XXXXzzz] (ditto for + // ins instead of del) + if i < len(in)-2 && + d.Type == diffmatchpatch.DiffEqual && + in[i+1].Type != diffmatchpatch.DiffEqual && + in[i+2].Type == diffmatchpatch.DiffEqual && + len(in[i+1].Text) <= len(d.Text) { + for cut := 0; cut < len(d.Text)-len(in[i+1].Text); cut++ { + if d.Text[cut:] == d.Text[cut+len(in[i+1].Text):]+in[i+1].Text { + in[i+2].Text = d.Text[cut+len(in[i+1].Text):] + in[i+1].Text + in[i+2].Text + in[i+1].Text = d.Text[cut : cut+len(in[i+1].Text)] + d.Text = d.Text[:cut] + break + } + } + } // diffmatchpatch solves diff("AAX","XTX") with // [delAA,=X,insTX] but we prefer to spell it // [delAA,insXT,=X]. @@ -144,27 +161,6 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) { in[i+1] = ins in[i+2] = eq } - // when diffmatchpatch says [=yyyyXXXX, delX, =zzz], - // we really want [=yyyy, delX, =XXXXzzz] (ditto for - // ins instead of del) - if i < len(in)-2 && - d.Type == diffmatchpatch.DiffEqual && - in[i+1].Type != diffmatchpatch.DiffEqual && - in[i+2].Type == diffmatchpatch.DiffEqual && - len(in[i+1].Text) <= len(d.Text) { - for cut := 0; cut < len(d.Text); cut++ { - skip := strings.Index(d.Text[cut:], in[i+1].Text) - if skip < 0 { - break - } - cut += skip - if d.Text[cut:]+in[i+1].Text == in[i+1].Text+d.Text[cut:] { - in[i+2].Text = d.Text[cut:] + in[i+2].Text - d.Text = d.Text[:cut] - break - } - } - } // diffmatchpatch solves diff("AXX","XXX") with // [delA,=XX,insX] but we prefer to spell it // [delA,insX,=XX]. @@ -225,6 +221,12 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) { } out = append(out, d) } + in, out = out, make([]diffmatchpatch.Diff, 0, len(in)) + for _, d := range in { + if len(d.Text) > 0 { + out = append(out, d) + } + } // for i := 0; i < len(out)-1; i++ { // if out[i].Type == diffmatchpatch.DiffDelete && len(out[i].Text) == 2 && // out[i+1].Type == diffmatchpatch.DiffInsert && len(out[i+1].Text) == 2 { diff --git a/hgvs/diff_test.go b/hgvs/diff_test.go index 10acdb772c..2778dc8f55 100644 --- a/hgvs/diff_test.go +++ b/hgvs/diff_test.go @@ -154,6 +154,12 @@ func (s *diffSuite) TestDiff(c *check.C) { b: "acgacatttacac", expect: []string{"7_8del"}, }, + { + // should delete leftmost + a: "acgacATatatacac", + b: "acgacatatacac", + expect: []string{"6_7del"}, + }, { // should insert leftmost a: "acgacatttacac", @@ -161,14 +167,30 @@ func (s *diffSuite) TestDiff(c *check.C) { expect: []string{"6_7insTT"}, }, { - a: "ccccaGATAtat", - b: "ccccatat", + // should insert leftmost + a: "acgacatatacac", + b: "acgacATatatacac", + expect: []string{"5_6insAT"}, + }, + { + a: "cccacGATAtatcc", + b: "cccactatcc", expect: []string{"6_9del"}, }, { - a: "aGATAtat", - b: "atat", - expect: []string{"2_5del"}, + a: "acGATAtatcc", + b: "actatcc", + expect: []string{"3_6del"}, + }, + { + a: "acTTTTTatcc", + b: "acGTTTatcc", + expect: []string{"3_4delinsG"}, + }, + { + a: "acTTTTatcc", + b: "acGTTTTTatcc", + expect: []string{"2_3insGT"}, }, } { c.Log(trial) -- 2.30.2