Fix another category of misspelled hgvs diff.
authorTom Clegg <tom@tomclegg.ca>
Thu, 3 Jun 2021 20:27:44 +0000 (16:27 -0400)
committerTom Clegg <tom@tomclegg.ca>
Thu, 3 Jun 2021 20:27:44 +0000 (16:27 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

hgvs/diff.go
hgvs/diff_test.go

index e04ccff58736bde066d8d935e4c9b4b9c28b21b6..47deab7246eb45af5b5caf935f4cea0c12cf6d47 100644 (file)
@@ -125,6 +125,20 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
                        in[i+1] = ins
                        in[i+2] = eq
                }
+               // diffmatchpatch solves diff("AXX","XXX") with
+               // [delA,=XX,insX] but we prefer to spell it
+               // [delA,insX,=XX].
+               //
+               // So, when we see a [del,=,ins] sequence that has the
+               // same effect after swapping the "=" and "ins" parts,
+               // we swap them.
+               if i < len(in)-2 &&
+                       d.Type == diffmatchpatch.DiffDelete &&
+                       in[i+1].Type == diffmatchpatch.DiffEqual &&
+                       in[i+2].Type == diffmatchpatch.DiffInsert &&
+                       in[i+1].Text+in[i+2].Text == in[i+2].Text+in[i+1].Text {
+                       in[i+2], in[i+1] = in[i+1], in[i+2]
+               }
                out = append(out, d)
        }
        return
index 20043ca31a6d1926b53a20cf0083ebeff73c2fcd..9a4e35531c64ab654ee2e1c8bb2993e924c39a2e 100644 (file)
@@ -104,6 +104,11 @@ func (s *diffSuite) TestDiff(c *check.C) {
                        b:      "acTtgaa",
                        expect: []string{"3G>T"},
                },
+               {
+                       a:      "tcagaagac",
+                       b:      "tcaAaagac",
+                       expect: []string{"4G>A"},
+               },
        } {
                c.Log(trial)
                var vars []string