import (
"fmt"
+ "strings"
"time"
"github.com/sergi/go-diff/diffmatchpatch"
}
func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
+ out = make([]diffmatchpatch.Diff, 0, len(in))
for i := 0; i < len(in); i++ {
d := in[i]
+ // Merge consecutive entries of same type (e.g.,
+ // "insert A; insert B")
for i < len(in)-1 && in[i].Type == in[i+1].Type {
d.Text += in[i+1].Text
i++
}
out = append(out, d)
}
+ in, out = out, make([]diffmatchpatch.Diff, 0, len(in))
+ for i := 0; i < len(in); i++ {
+ d := in[i]
+ // diffmatchpatch solves diff("AAX","XTX") with
+ // [delAA,=X,insTX] but we prefer to spell it
+ // [delAA,insXT,=X].
+ //
+ // So, when we see a [del,=,ins] sequence where the
+ // "=" part is a suffix of the "ins" part -- e.g.,
+ // [delAAA,=CGG,insTTTCGG] -- we rearrange it to the
+ // equivalent spelling [delAAA,insCGGTTT,=CGG].
+ if i < len(in)-2 &&
+ d.Type == diffmatchpatch.DiffDelete &&
+ in[i+1].Type == diffmatchpatch.DiffEqual &&
+ in[i+2].Type == diffmatchpatch.DiffInsert &&
+ strings.HasSuffix(in[i+2].Text, in[i+1].Text) {
+ eq, ins := in[i+1], in[i+2]
+ ins.Text = eq.Text + ins.Text[:len(ins.Text)-len(eq.Text)]
+ in[i+1] = ins
+ in[i+2] = eq
+ }
+ out = append(out, d)
+ }
return
}
package hgvs
import (
+ "strings"
"testing"
"gopkg.in/check.v1"
{
a: "aaaaaaaaaa",
b: "aaaaCaaaaa",
- expect: []string{"5a>C"},
+ expect: []string{"5A>C"},
},
{
a: "aaaacGcaaa",
b: "aaCCttttttC",
expect: []string{"3_4delinsCC", "7_8del", "12_13insC"},
},
+ {
+ // without cleanup, diffmatchpatch solves this as {"3del", "=A", "4_5insA"}
+ a: "aggaggggg",
+ b: "agAaggggg",
+ expect: []string{"3G>A"},
+ },
+ {
+ // without cleanup, diffmatchpatch solves this as {"3_4del", "=A", "5_6insAA"}
+ a: "agggaggggg",
+ b: "agAAaggggg",
+ expect: []string{"3_4delinsAA"},
+ },
+ {
+ // without cleanup, diffmatchpatch solves this as {"3_4del", "=A", "5_6insCA"}
+ a: "agggaggggg",
+ b: "agACaggggg",
+ expect: []string{"3_4delinsAC"},
+ },
+ {
+ // without cleanup, diffmatchpatch solves this as {"3_7del", "=A", "8_9insAAACA"}
+ a: "aggggggaggggg",
+ b: "agAAAACaggggg",
+ expect: []string{"3_7delinsAAAAC"},
+ },
+ {
+ // without cleanup, diffmatchpatch solves this as {"3_7del", "=AAAA", "11_12insCAAAA"}
+ a: "aggggggaaaaggggg",
+ b: "agAAAACaaaaggggg",
+ expect: []string{"3_7delinsAAAAC"},
+ },
+ {
+ a: "agggaggggg",
+ b: "agCAaggggg",
+ expect: []string{"3_4delinsCA"},
+ },
+ {
+ a: "agggg",
+ b: "agAAg",
+ expect: []string{"3_4delinsAA"},
+ },
} {
c.Log(trial)
var vars []string
- diffs, _ := Diff(trial.a, trial.b, 0)
+ diffs, _ := Diff(strings.ToUpper(trial.a), strings.ToUpper(trial.b), 0)
for _, v := range diffs {
vars = append(vars, v.String())
}