Fix diff case
authorTom Clegg <tom@curii.com>
Wed, 27 Jul 2022 18:48:05 +0000 (14:48 -0400)
committerTom Clegg <tom@curii.com>
Wed, 27 Jul 2022 18:48:05 +0000 (14:48 -0400)
refs #19236 #note-15.2, #note-15.3

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

hgvs/diff.go
hgvs/diff_test.go

index c217949ee41e41d46429aa52d987da09c88edf6f..ed991405248be9bbeeb1e3cf1e6d2c1222b0bd38 100644 (file)
@@ -175,6 +175,37 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
                        in[i+1].Text+in[i+2].Text == in[i+2].Text+in[i+1].Text {
                        in[i+2], in[i+1] = in[i+1], in[i+2]
                }
+               // Likewise, diffmatchpatch solves
+               // diff("XXXA","XXAA") with [delX,=XXA,insA], we
+               // prefer [=XX,delX,insA,=A]
+               if i < len(in)-2 &&
+                       d.Type == diffmatchpatch.DiffDelete &&
+                       in[i+1].Type == diffmatchpatch.DiffEqual &&
+                       in[i+2].Type == diffmatchpatch.DiffInsert {
+                       redo := false
+                       for x := len(d.Text); x <= len(in[i+1].Text)-len(in[i+2].Text); x++ {
+                               // d  in[i+1]  in[i+2]
+                               // x  xxx aaa  a
+                               //       ^
+                               // x  xx
+                               //    xxx
+                               //        aaa
+                               //         aa  a
+                               if d.Text+in[i+1].Text[:x-len(d.Text)] == in[i+1].Text[:x] &&
+                                       in[i+1].Text[x:] == in[i+1].Text[x+len(in[i+2].Text):]+in[i+2].Text {
+                                       out = append(out, diffmatchpatch.Diff{diffmatchpatch.DiffEqual, d.Text + in[i+1].Text[:x-len(d.Text)]})
+                                       in[i], in[i+1], in[i+2] = diffmatchpatch.Diff{diffmatchpatch.DiffDelete, in[i+1].Text[x-len(d.Text) : x]},
+                                               diffmatchpatch.Diff{diffmatchpatch.DiffInsert, in[i+1].Text[x : x+len(in[i+2].Text)]},
+                                               diffmatchpatch.Diff{diffmatchpatch.DiffEqual, in[i+1].Text[x+len(in[i+2].Text):] + in[i+2].Text}
+                                       redo = true
+                                       break
+                               }
+                       }
+                       if redo {
+                               i--
+                               continue
+                       }
+               }
                // when diffmatchpatch says [delAAA, insXAY] and
                // len(X)==1, we prefer to treat the A>X as a snp.
                if i < len(in)-1 &&
index 3cac5db020eedd2c86969ec25854c774950132ec..717e7046cbff38c6ff70a056db153f8b896d45ec 100644 (file)
@@ -197,6 +197,21 @@ func (s *diffSuite) TestDiff(c *check.C) {
                        b:      "agg",
                        expect: []string{"3_15del"},
                },
+               {
+                       a:      "aggGac",
+                       b:      "aggAac",
+                       expect: []string{"4G>A"},
+               },
+               {
+                       a:      "atttTc",
+                       b:      "atttCc",
+                       expect: []string{"5T>C"},
+               },
+               {
+                       a:      "atatataTAcgcgaa",
+                       b:      "atatataCGcgcgaa",
+                       expect: []string{"8T>C", "9A>G"},
+               },
        } {
                c.Log(trial)
                var vars []string