47deab7246eb45af5b5caf935f4cea0c12cf6d47
[lightning.git] / hgvs / diff.go
1 package hgvs
2
3 import (
4         "fmt"
5         "strings"
6         "time"
7
8         "github.com/sergi/go-diff/diffmatchpatch"
9 )
10
11 type Variant struct {
12         Position int
13         Ref      string
14         New      string
15         Left     string // base preceding an indel, if Ref or New is empty
16 }
17
18 func (v *Variant) String() string {
19         switch {
20         case len(v.New) == 0 && len(v.Ref) == 0:
21                 return fmt.Sprintf("%d=", v.Position)
22         case len(v.New) == 0 && len(v.Ref) == 1:
23                 return fmt.Sprintf("%ddel", v.Position)
24         case len(v.New) == 0:
25                 return fmt.Sprintf("%d_%ddel", v.Position, v.Position+len(v.Ref)-1)
26         case len(v.Ref) == 1 && len(v.New) == 1:
27                 return fmt.Sprintf("%d%s>%s", v.Position, v.Ref, v.New)
28         case len(v.Ref) == 0:
29                 return fmt.Sprintf("%d_%dins%s", v.Position-1, v.Position, v.New)
30         case len(v.Ref) == 1 && len(v.New) > 0:
31                 return fmt.Sprintf("%ddelins%s", v.Position, v.New)
32         default:
33                 return fmt.Sprintf("%d_%ddelins%s", v.Position, v.Position+len(v.Ref)-1, v.New)
34         }
35 }
36
37 // PadLeft returns a Variant that is equivalent to v but (if possible)
38 // uses the stashed preceding base (the Left field) to avoid having a
39 // non-empty Ref or New part, even for an insertion or deletion.
40 //
41 // For example, if v is {Position: 45, Ref: "", New: "A"}, PadLeft
42 // might return {Position: 44, Ref: "T", New: "TA"}.
43 func (v *Variant) PadLeft() Variant {
44         if len(v.Ref) == 0 || len(v.New) == 0 {
45                 return Variant{
46                         Position: v.Position - len(v.Left),
47                         Ref:      v.Left + v.Ref,
48                         New:      v.Left + v.New,
49                 }
50         } else {
51                 return *v
52         }
53 }
54
55 func Diff(a, b string, timeout time.Duration) ([]Variant, bool) {
56         dmp := diffmatchpatch.New()
57         var deadline time.Time
58         if timeout > 0 {
59                 deadline = time.Now().Add(timeout)
60         }
61         diffs := dmp.DiffBisect(a, b, deadline)
62         timedOut := false
63         if timeout > 0 && time.Now().After(deadline) {
64                 timedOut = true
65         }
66         diffs = cleanup(dmp.DiffCleanupEfficiency(diffs))
67         pos := 1
68         var variants []Variant
69         for i := 0; i < len(diffs); {
70                 left := "" // last char before an insertion or deletion
71                 for ; i < len(diffs) && diffs[i].Type == diffmatchpatch.DiffEqual; i++ {
72                         pos += len(diffs[i].Text)
73                         if tlen := len(diffs[i].Text); tlen > 0 {
74                                 left = diffs[i].Text[tlen-1:]
75                         }
76                 }
77                 if i >= len(diffs) {
78                         break
79                 }
80                 v := Variant{Position: pos, Left: left}
81                 for ; i < len(diffs) && diffs[i].Type != diffmatchpatch.DiffEqual; i++ {
82                         if diffs[i].Type == diffmatchpatch.DiffDelete {
83                                 v.Ref += diffs[i].Text
84                         } else {
85                                 v.New += diffs[i].Text
86                         }
87                 }
88                 pos += len(v.Ref)
89                 variants = append(variants, v)
90                 left = ""
91         }
92         return variants, timedOut
93 }
94
95 func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
96         out = make([]diffmatchpatch.Diff, 0, len(in))
97         for i := 0; i < len(in); i++ {
98                 d := in[i]
99                 // Merge consecutive entries of same type (e.g.,
100                 // "insert A; insert B")
101                 for i < len(in)-1 && in[i].Type == in[i+1].Type {
102                         d.Text += in[i+1].Text
103                         i++
104                 }
105                 out = append(out, d)
106         }
107         in, out = out, make([]diffmatchpatch.Diff, 0, len(in))
108         for i := 0; i < len(in); i++ {
109                 d := in[i]
110                 // diffmatchpatch solves diff("AAX","XTX") with
111                 // [delAA,=X,insTX] but we prefer to spell it
112                 // [delAA,insXT,=X].
113                 //
114                 // So, when we see a [del,=,ins] sequence where the
115                 // "=" part is a suffix of the "ins" part -- e.g.,
116                 // [delAAA,=CGG,insTTTCGG] -- we rearrange it to the
117                 // equivalent spelling [delAAA,insCGGTTT,=CGG].
118                 if i < len(in)-2 &&
119                         d.Type == diffmatchpatch.DiffDelete &&
120                         in[i+1].Type == diffmatchpatch.DiffEqual &&
121                         in[i+2].Type == diffmatchpatch.DiffInsert &&
122                         strings.HasSuffix(in[i+2].Text, in[i+1].Text) {
123                         eq, ins := in[i+1], in[i+2]
124                         ins.Text = eq.Text + ins.Text[:len(ins.Text)-len(eq.Text)]
125                         in[i+1] = ins
126                         in[i+2] = eq
127                 }
128                 // diffmatchpatch solves diff("AXX","XXX") with
129                 // [delA,=XX,insX] but we prefer to spell it
130                 // [delA,insX,=XX].
131                 //
132                 // So, when we see a [del,=,ins] sequence that has the
133                 // same effect after swapping the "=" and "ins" parts,
134                 // we swap them.
135                 if i < len(in)-2 &&
136                         d.Type == diffmatchpatch.DiffDelete &&
137                         in[i+1].Type == diffmatchpatch.DiffEqual &&
138                         in[i+2].Type == diffmatchpatch.DiffInsert &&
139                         in[i+1].Text+in[i+2].Text == in[i+2].Text+in[i+1].Text {
140                         in[i+2], in[i+1] = in[i+1], in[i+2]
141                 }
142                 out = append(out, d)
143         }
144         return
145 }
146
147 func Less(a, b Variant) bool {
148         if a.Position != b.Position {
149                 return a.Position < b.Position
150         } else if a.New != b.New {
151                 return a.New < b.New
152         } else {
153                 return a.Ref < b.Ref
154         }
155 }