Fix left-most diff cases.
authorTom Clegg <tom@curii.com>
Fri, 18 Feb 2022 20:11:19 +0000 (15:11 -0500)
committerTom Clegg <tom@curii.com>
Fri, 18 Feb 2022 20:11:19 +0000 (15:11 -0500)
refs #18721

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

hgvs/diff.go
hgvs/diff_test.go

index 206cf7e56ceebd8744828a844d499f3cfeb2f89d..780cfacf10c1d3ba3b802f8c249fce4d1eef6031 100644 (file)
@@ -144,6 +144,27 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
                        in[i+1] = ins
                        in[i+2] = eq
                }
                        in[i+1] = ins
                        in[i+2] = eq
                }
+               // when diffmatchpatch says [=yyyyXXXX, delX, =zzz],
+               // we really want [=yyyy, delX, =XXXXzzz] (ditto for
+               // ins instead of del)
+               if i < len(in)-2 &&
+                       d.Type == diffmatchpatch.DiffEqual &&
+                       in[i+1].Type != diffmatchpatch.DiffEqual &&
+                       in[i+2].Type == diffmatchpatch.DiffEqual &&
+                       len(in[i+1].Text) <= len(d.Text) {
+                       for cut := 0; cut < len(d.Text); cut++ {
+                               skip := strings.Index(d.Text[cut:], in[i+1].Text)
+                               if skip < 0 {
+                                       break
+                               }
+                               cut += skip
+                               if d.Text[cut:]+in[i+1].Text == in[i+1].Text+d.Text[cut:] {
+                                       in[i+2].Text = d.Text[cut:] + in[i+2].Text
+                                       d.Text = d.Text[:cut]
+                                       break
+                               }
+                       }
+               }
                // diffmatchpatch solves diff("AXX","XXX") with
                // [delA,=XX,insX] but we prefer to spell it
                // [delA,insX,=XX].
                // diffmatchpatch solves diff("AXX","XXX") with
                // [delA,=XX,insX] but we prefer to spell it
                // [delA,insX,=XX].
index a5e861e35a4b605851616417147f05001dec4553..10acdb772c64c65fad369e8abad0a33a3afc15cf 100644 (file)
@@ -148,6 +148,28 @@ func (s *diffSuite) TestDiff(c *check.C) {
                        b:      "tcGCcggac",
                        expect: []string{"3A>G", "4T>C", "7del"},
                },
                        b:      "tcGCcggac",
                        expect: []string{"3A>G", "4T>C", "7del"},
                },
+               {
+                       // should delete leftmost
+                       a:      "acgacaTTtttacac",
+                       b:      "acgacatttacac",
+                       expect: []string{"7_8del"},
+               },
+               {
+                       // should insert leftmost
+                       a:      "acgacatttacac",
+                       b:      "acgacaTTtttacac",
+                       expect: []string{"6_7insTT"},
+               },
+               {
+                       a:      "ccccaGATAtat",
+                       b:      "ccccatat",
+                       expect: []string{"6_9del"},
+               },
+               {
+                       a:      "aGATAtat",
+                       b:      "atat",
+                       expect: []string{"2_5del"},
+               },
        } {
                c.Log(trial)
                var vars []string
        } {
                c.Log(trial)
                var vars []string