Fix left-most diff cases.
authorTom Clegg <tom@curii.com>
Thu, 24 Feb 2022 15:50:13 +0000 (10:50 -0500)
committerTom Clegg <tom@curii.com>
Thu, 24 Feb 2022 15:50:13 +0000 (10:50 -0500)
refs #18721

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

hgvs/diff.go
hgvs/diff_test.go

index 780cfacf10c1d3ba3b802f8c249fce4d1eef6031..e2b73439bf9d316e44afdc55005c05a0b1036e6f 100644 (file)
@@ -126,6 +126,23 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
        in, out = out, make([]diffmatchpatch.Diff, 0, len(in))
        for i := 0; i < len(in); i++ {
                d := in[i]
+               // when diffmatchpatch says [=yyyyXXXX, delX, =zzz],
+               // we really want [=yyyy, delX, =XXXXzzz] (ditto for
+               // ins instead of del)
+               if i < len(in)-2 &&
+                       d.Type == diffmatchpatch.DiffEqual &&
+                       in[i+1].Type != diffmatchpatch.DiffEqual &&
+                       in[i+2].Type == diffmatchpatch.DiffEqual &&
+                       len(in[i+1].Text) <= len(d.Text) {
+                       for cut := 0; cut < len(d.Text)-len(in[i+1].Text); cut++ {
+                               if d.Text[cut:] == d.Text[cut+len(in[i+1].Text):]+in[i+1].Text {
+                                       in[i+2].Text = d.Text[cut+len(in[i+1].Text):] + in[i+1].Text + in[i+2].Text
+                                       in[i+1].Text = d.Text[cut : cut+len(in[i+1].Text)]
+                                       d.Text = d.Text[:cut]
+                                       break
+                               }
+                       }
+               }
                // diffmatchpatch solves diff("AAX","XTX") with
                // [delAA,=X,insTX] but we prefer to spell it
                // [delAA,insXT,=X].
@@ -144,27 +161,6 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
                        in[i+1] = ins
                        in[i+2] = eq
                }
-               // when diffmatchpatch says [=yyyyXXXX, delX, =zzz],
-               // we really want [=yyyy, delX, =XXXXzzz] (ditto for
-               // ins instead of del)
-               if i < len(in)-2 &&
-                       d.Type == diffmatchpatch.DiffEqual &&
-                       in[i+1].Type != diffmatchpatch.DiffEqual &&
-                       in[i+2].Type == diffmatchpatch.DiffEqual &&
-                       len(in[i+1].Text) <= len(d.Text) {
-                       for cut := 0; cut < len(d.Text); cut++ {
-                               skip := strings.Index(d.Text[cut:], in[i+1].Text)
-                               if skip < 0 {
-                                       break
-                               }
-                               cut += skip
-                               if d.Text[cut:]+in[i+1].Text == in[i+1].Text+d.Text[cut:] {
-                                       in[i+2].Text = d.Text[cut:] + in[i+2].Text
-                                       d.Text = d.Text[:cut]
-                                       break
-                               }
-                       }
-               }
                // diffmatchpatch solves diff("AXX","XXX") with
                // [delA,=XX,insX] but we prefer to spell it
                // [delA,insX,=XX].
@@ -225,6 +221,12 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
                }
                out = append(out, d)
        }
+       in, out = out, make([]diffmatchpatch.Diff, 0, len(in))
+       for _, d := range in {
+               if len(d.Text) > 0 {
+                       out = append(out, d)
+               }
+       }
        // for i := 0; i < len(out)-1; i++ {
        //      if out[i].Type == diffmatchpatch.DiffDelete && len(out[i].Text) == 2 &&
        //              out[i+1].Type == diffmatchpatch.DiffInsert && len(out[i+1].Text) == 2 {
index 10acdb772c64c65fad369e8abad0a33a3afc15cf..2778dc8f553e2ce5f4964f9b0d0fb59c2efed1ab 100644 (file)
@@ -154,6 +154,12 @@ func (s *diffSuite) TestDiff(c *check.C) {
                        b:      "acgacatttacac",
                        expect: []string{"7_8del"},
                },
+               {
+                       // should delete leftmost
+                       a:      "acgacATatatacac",
+                       b:      "acgacatatacac",
+                       expect: []string{"6_7del"},
+               },
                {
                        // should insert leftmost
                        a:      "acgacatttacac",
@@ -161,14 +167,30 @@ func (s *diffSuite) TestDiff(c *check.C) {
                        expect: []string{"6_7insTT"},
                },
                {
-                       a:      "ccccaGATAtat",
-                       b:      "ccccatat",
+                       // should insert leftmost
+                       a:      "acgacatatacac",
+                       b:      "acgacATatatacac",
+                       expect: []string{"5_6insAT"},
+               },
+               {
+                       a:      "cccacGATAtatcc",
+                       b:      "cccactatcc",
                        expect: []string{"6_9del"},
                },
                {
-                       a:      "aGATAtat",
-                       b:      "atat",
-                       expect: []string{"2_5del"},
+                       a:      "acGATAtatcc",
+                       b:      "actatcc",
+                       expect: []string{"3_6del"},
+               },
+               {
+                       a:      "acTTTTTatcc",
+                       b:      "acGTTTatcc",
+                       expect: []string{"3_4delinsG"},
+               },
+               {
+                       a:      "acTTTTatcc",
+                       b:      "acGTTTTTatcc",
+                       expect: []string{"2_3insGT"},
                },
        } {
                c.Log(trial)