Export hgvs-onehot.
[lightning.git] / hgvs / diff.go
1 package hgvs
2
3 import (
4         "fmt"
5         "strings"
6         "time"
7
8         "github.com/sergi/go-diff/diffmatchpatch"
9 )
10
11 type Variant struct {
12         Position int
13         Ref      string
14         New      string
15         Left     string // base preceding an indel, if Ref or New is empty
16 }
17
18 func (v *Variant) String() string {
19         switch {
20         case len(v.New) == 0 && len(v.Ref) == 0:
21                 return fmt.Sprintf("%d=", v.Position)
22         case len(v.New) == 0 && len(v.Ref) == 1:
23                 return fmt.Sprintf("%ddel", v.Position)
24         case len(v.New) == 0:
25                 return fmt.Sprintf("%d_%ddel", v.Position, v.Position+len(v.Ref)-1)
26         case len(v.Ref) == 1 && len(v.New) == 1:
27                 return fmt.Sprintf("%d%s>%s", v.Position, v.Ref, v.New)
28         case len(v.Ref) == 0:
29                 return fmt.Sprintf("%d_%dins%s", v.Position-1, v.Position, v.New)
30         case len(v.Ref) == 1 && len(v.New) > 0:
31                 return fmt.Sprintf("%ddelins%s", v.Position, v.New)
32         default:
33                 return fmt.Sprintf("%d_%ddelins%s", v.Position, v.Position+len(v.Ref)-1, v.New)
34         }
35 }
36
37 // PadLeft returns a Variant that is equivalent to v but (if possible)
38 // uses the stashed preceding base (the Left field) to avoid having a
39 // non-empty Ref or New part, even for an insertion or deletion.
40 //
41 // For example, if v is {Position: 45, Ref: "", New: "A"}, PadLeft
42 // might return {Position: 44, Ref: "T", New: "TA"}.
43 func (v *Variant) PadLeft() Variant {
44         if len(v.Ref) == 0 || len(v.New) == 0 {
45                 return Variant{
46                         Position: v.Position - len(v.Left),
47                         Ref:      v.Left + v.Ref,
48                         New:      v.Left + v.New,
49                 }
50         } else {
51                 return *v
52         }
53 }
54
55 func Diff(a, b string, timeout time.Duration) ([]Variant, bool) {
56         dmp := diffmatchpatch.New()
57         var deadline time.Time
58         if timeout > 0 {
59                 deadline = time.Now().Add(timeout)
60         }
61         diffs := dmp.DiffBisect(a, b, deadline)
62         timedOut := false
63         if timeout > 0 && time.Now().After(deadline) {
64                 timedOut = true
65         }
66         diffs = cleanup(dmp.DiffCleanupEfficiency(diffs))
67         pos := 1
68         var variants []Variant
69         for i := 0; i < len(diffs); {
70                 left := "" // last char before an insertion or deletion
71                 for ; i < len(diffs) && diffs[i].Type == diffmatchpatch.DiffEqual; i++ {
72                         pos += len(diffs[i].Text)
73                         if tlen := len(diffs[i].Text); tlen > 0 {
74                                 left = diffs[i].Text[tlen-1:]
75                         }
76                 }
77                 if i >= len(diffs) {
78                         break
79                 }
80                 v := Variant{Position: pos, Left: left}
81                 for ; i < len(diffs) && diffs[i].Type != diffmatchpatch.DiffEqual; i++ {
82                         if diffs[i].Type == diffmatchpatch.DiffDelete {
83                                 v.Ref += diffs[i].Text
84                         } else {
85                                 v.New += diffs[i].Text
86                         }
87                 }
88                 pos += len(v.Ref)
89                 variants = append(variants, v)
90                 left = ""
91         }
92         return variants, timedOut
93 }
94
95 func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
96         out = make([]diffmatchpatch.Diff, 0, len(in))
97         for i := 0; i < len(in); i++ {
98                 d := in[i]
99                 // Merge consecutive entries of same type (e.g.,
100                 // "insert A; insert B")
101                 for i < len(in)-1 && in[i].Type == in[i+1].Type {
102                         d.Text += in[i+1].Text
103                         i++
104                 }
105                 out = append(out, d)
106         }
107         in, out = out, make([]diffmatchpatch.Diff, 0, len(in))
108         for i := 0; i < len(in); i++ {
109                 d := in[i]
110                 // diffmatchpatch solves diff("AAX","XTX") with
111                 // [delAA,=X,insTX] but we prefer to spell it
112                 // [delAA,insXT,=X].
113                 //
114                 // So, when we see a [del,=,ins] sequence where the
115                 // "=" part is a suffix of the "ins" part -- e.g.,
116                 // [delAAA,=CGG,insTTTCGG] -- we rearrange it to the
117                 // equivalent spelling [delAAA,insCGGTTT,=CGG].
118                 if i < len(in)-2 &&
119                         d.Type == diffmatchpatch.DiffDelete &&
120                         in[i+1].Type == diffmatchpatch.DiffEqual &&
121                         in[i+2].Type == diffmatchpatch.DiffInsert &&
122                         strings.HasSuffix(in[i+2].Text, in[i+1].Text) {
123                         eq, ins := in[i+1], in[i+2]
124                         ins.Text = eq.Text + ins.Text[:len(ins.Text)-len(eq.Text)]
125                         in[i+1] = ins
126                         in[i+2] = eq
127                 }
128                 out = append(out, d)
129         }
130         return
131 }
132
133 func Less(a, b Variant) bool {
134         if a.Position != b.Position {
135                 return a.Position < b.Position
136         } else if a.New != b.New {
137                 return a.New < b.New
138         } else {
139                 return a.Ref < b.Ref
140         }
141 }