Call 2-base deletion-insertion as two adjacent SNPs.
[lightning.git] / hgvs / diff.go
index 47deab7246eb45af5b5caf935f4cea0c12cf6d47..206cf7e56ceebd8744828a844d499f3cfeb2f89d 100644 (file)
@@ -1,3 +1,7 @@
+// Copyright (C) The Lightning Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
 package hgvs
 
 import (
@@ -19,6 +23,10 @@ func (v *Variant) String() string {
        switch {
        case len(v.New) == 0 && len(v.Ref) == 0:
                return fmt.Sprintf("%d=", v.Position)
+       case len(v.New) == 1 && v.New == v.Ref:
+               return fmt.Sprintf("%d=", v.Position)
+       case v.New == v.Ref:
+               return fmt.Sprintf("%d_%d=", v.Position, v.Position+len(v.Ref)-1)
        case len(v.New) == 0 && len(v.Ref) == 1:
                return fmt.Sprintf("%ddel", v.Position)
        case len(v.New) == 0:
@@ -85,6 +93,17 @@ func Diff(a, b string, timeout time.Duration) ([]Variant, bool) {
                                v.New += diffs[i].Text
                        }
                }
+               if len(v.Ref) == 2 && len(v.New) == 2 {
+                       v1 := v
+                       v1.Ref = v1.Ref[:1]
+                       v1.New = v1.New[:1]
+                       v.Ref = v.Ref[1:]
+                       v.New = v.New[1:]
+                       v.Position++
+                       v.Left = v1.Ref
+                       pos++
+                       variants = append(variants, v1)
+               }
                pos += len(v.Ref)
                variants = append(variants, v)
                left = ""
@@ -139,8 +158,63 @@ func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
                        in[i+1].Text+in[i+2].Text == in[i+2].Text+in[i+1].Text {
                        in[i+2], in[i+1] = in[i+1], in[i+2]
                }
+               // when diffmatchpatch says [delAAA, insXAY] and
+               // len(X)==1, we prefer to treat the A>X as a snp.
+               if i < len(in)-1 &&
+                       d.Type == diffmatchpatch.DiffDelete &&
+                       in[i+1].Type == diffmatchpatch.DiffInsert &&
+                       len(d.Text) >= 2 &&
+                       len(in[i+1].Text) >= 2 &&
+                       d.Text[1] == in[i+1].Text[1] {
+                       eqend := 2
+                       for ; eqend < len(d.Text) && eqend < len(in[i+1].Text) && d.Text[eqend] == in[i+1].Text[eqend]; eqend++ {
+                       }
+                       out = append(out,
+                               diffmatchpatch.Diff{diffmatchpatch.DiffDelete, d.Text[:1]},
+                               diffmatchpatch.Diff{diffmatchpatch.DiffInsert, in[i+1].Text[:1]},
+                               diffmatchpatch.Diff{diffmatchpatch.DiffEqual, d.Text[1:eqend]})
+                       in[i].Text, in[i+1].Text = in[i].Text[eqend:], in[i+1].Text[eqend:]
+                       i--
+                       continue
+               }
+               // when diffmatchpatch says [delAAA, insXaY] and
+               // len(Y)==1, we prefer to treat the A>Y as a snp.
+               if i < len(in)-1 &&
+                       d.Type == diffmatchpatch.DiffDelete &&
+                       in[i+1].Type == diffmatchpatch.DiffInsert &&
+                       len(d.Text) >= 2 &&
+                       len(in[i+1].Text) >= 2 &&
+                       d.Text[len(d.Text)-2] == in[i+1].Text[len(in[i+1].Text)-2] {
+                       // eqstart will be the number of equal chars
+                       // before the terminal snp, plus 1 for the snp
+                       // itself. Example, for [delAAAA, insTTAAG],
+                       // eqstart will be 3.
+                       eqstart := 2
+                       for ; eqstart < len(d.Text) && eqstart < len(in[i+1].Text) && d.Text[len(d.Text)-eqstart] == in[i+1].Text[len(in[i+1].Text)-eqstart]; eqstart++ {
+                       }
+                       eqstart--
+                       out = append(out,
+                               diffmatchpatch.Diff{diffmatchpatch.DiffDelete, d.Text[:len(d.Text)-eqstart]},
+                               diffmatchpatch.Diff{diffmatchpatch.DiffInsert, in[i+1].Text[:len(in[i+1].Text)-eqstart]},
+                               diffmatchpatch.Diff{diffmatchpatch.DiffEqual, d.Text[len(d.Text)-eqstart : len(d.Text)-1]},
+                               diffmatchpatch.Diff{diffmatchpatch.DiffDelete, d.Text[len(d.Text)-1:]},
+                               diffmatchpatch.Diff{diffmatchpatch.DiffInsert, in[i+1].Text[len(in[i+1].Text)-1:]})
+                       i++
+                       continue
+               }
                out = append(out, d)
        }
+       // for i := 0; i < len(out)-1; i++ {
+       //      if out[i].Type == diffmatchpatch.DiffDelete && len(out[i].Text) == 2 &&
+       //              out[i+1].Type == diffmatchpatch.DiffInsert && len(out[i+1].Text) == 2 {
+       //              out = append(out, diffmatchpatch.Diff{}, diffmatchpatch.Diff{})
+       //              copy(out[i+4:], out[i+2:])
+       //              out[i+2] = diffmatchpatch.Diff{diffmatchpatch.DiffDelete, out[i].Text[1:]}
+       //              out[i+3] = diffmatchpatch.Diff{diffmatchpatch.DiffInsert, out[i+1].Text[1:]}
+       //              out[i].Text = out[i].Text[:1]
+       //              out[i+1].Text = out[i+1].Text[:1]
+       //      }
+       // }
        return
 }