Plot import stats.
[lightning.git] / hgvs / diff.go
1 package hgvs
2
3 import (
4         "fmt"
5         "time"
6
7         "github.com/sergi/go-diff/diffmatchpatch"
8 )
9
10 type Variant struct {
11         Position int
12         Ref      string
13         New      string
14         Left     string // base preceding an indel, if Ref or New is empty
15 }
16
17 func (v *Variant) String() string {
18         switch {
19         case len(v.New) == 0 && len(v.Ref) == 0:
20                 return fmt.Sprintf("%d=", v.Position)
21         case len(v.New) == 0 && len(v.Ref) == 1:
22                 return fmt.Sprintf("%ddel", v.Position)
23         case len(v.New) == 0:
24                 return fmt.Sprintf("%d_%ddel", v.Position, v.Position+len(v.Ref)-1)
25         case len(v.Ref) == 1 && len(v.New) == 1:
26                 return fmt.Sprintf("%d%s>%s", v.Position, v.Ref, v.New)
27         case len(v.Ref) == 0:
28                 return fmt.Sprintf("%d_%dins%s", v.Position-1, v.Position, v.New)
29         case len(v.Ref) == 1 && len(v.New) > 0:
30                 return fmt.Sprintf("%ddelins%s", v.Position, v.New)
31         default:
32                 return fmt.Sprintf("%d_%ddelins%s", v.Position, v.Position+len(v.Ref)-1, v.New)
33         }
34 }
35
36 // PadLeft returns a Variant that is equivalent to v but (if possible)
37 // uses the stashed preceding base (the Left field) to avoid having a
38 // non-empty Ref or New part, even for an insertion or deletion.
39 //
40 // For example, if v is {Position: 45, Ref: "", New: "A"}, PadLeft
41 // might return {Position: 44, Ref: "T", New: "TA"}.
42 func (v *Variant) PadLeft() Variant {
43         if len(v.Ref) == 0 || len(v.New) == 0 {
44                 return Variant{
45                         Position: v.Position - len(v.Left),
46                         Ref:      v.Left + v.Ref,
47                         New:      v.Left + v.New,
48                 }
49         } else {
50                 return *v
51         }
52 }
53
54 func Diff(a, b string, timeout time.Duration) ([]Variant, bool) {
55         dmp := diffmatchpatch.New()
56         var deadline time.Time
57         if timeout > 0 {
58                 deadline = time.Now().Add(timeout)
59         }
60         diffs := dmp.DiffBisect(a, b, deadline)
61         timedOut := false
62         if timeout > 0 && time.Now().After(deadline) {
63                 timedOut = true
64         }
65         diffs = cleanup(dmp.DiffCleanupEfficiency(diffs))
66         pos := 1
67         var variants []Variant
68         for i := 0; i < len(diffs); {
69                 left := "" // last char before an insertion or deletion
70                 for ; i < len(diffs) && diffs[i].Type == diffmatchpatch.DiffEqual; i++ {
71                         pos += len(diffs[i].Text)
72                         if tlen := len(diffs[i].Text); tlen > 0 {
73                                 left = diffs[i].Text[tlen-1:]
74                         }
75                 }
76                 if i >= len(diffs) {
77                         break
78                 }
79                 v := Variant{Position: pos, Left: left}
80                 for ; i < len(diffs) && diffs[i].Type != diffmatchpatch.DiffEqual; i++ {
81                         if diffs[i].Type == diffmatchpatch.DiffDelete {
82                                 v.Ref += diffs[i].Text
83                         } else {
84                                 v.New += diffs[i].Text
85                         }
86                 }
87                 pos += len(v.Ref)
88                 variants = append(variants, v)
89                 left = ""
90         }
91         return variants, timedOut
92 }
93
94 func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
95         for i := 0; i < len(in); i++ {
96                 d := in[i]
97                 for i < len(in)-1 && in[i].Type == in[i+1].Type {
98                         d.Text += in[i+1].Text
99                         i++
100                 }
101                 out = append(out, d)
102         }
103         return
104 }