"build-docker-image": &buildDockerImage{},
"pca": &pythonPCA{},
"plot": &pythonPlot{},
+ "diff-fasta": &diffFasta{},
})
)
--- /dev/null
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "flag"
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/curii/lightning/hgvs"
+)
+
+type diffFasta struct{}
+
+func (cmd *diffFasta) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+ var err error
+ defer func() {
+ if err != nil {
+ fmt.Fprintf(stderr, "%s\n", err)
+ }
+ }()
+ flags := flag.NewFlagSet("", flag.ContinueOnError)
+ flags.SetOutput(stderr)
+ offset := flags.Int("offset", 0, "coordinate offset")
+ sequence := flags.String("sequence", "chr1", "sequence label")
+ err = flags.Parse(args)
+ if err == flag.ErrHelp {
+ err = nil
+ return 0
+ } else if err != nil {
+ return 2
+ }
+ if len(flags.Args()) != 2 {
+ err = fmt.Errorf("usage: %s [options] a.fasta b.fasta", prog)
+ return 2
+ }
+
+ var fasta [2][]byte
+ errs := make(chan error, 2)
+ for idx, fnm := range flags.Args() {
+ idx, fnm := idx, fnm
+ go func() {
+ f, err := os.Open(fnm)
+ if err != nil {
+ errs <- err
+ return
+ }
+ defer f.Close()
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ buf := scanner.Bytes()
+ if len(buf) > 0 && buf[0] != '>' {
+ fasta[idx] = append(fasta[idx], bytes.ToUpper(buf)...)
+ }
+ }
+ errs <- scanner.Err()
+ }()
+ }
+ for range flags.Args() {
+ if err = <-errs; err != nil {
+ return 1
+ }
+ }
+
+ variants := hgvs.Diff(string(fasta[0]), string(fasta[1]))
+ if *offset != 0 {
+ for i := range variants {
+ variants[i].Position += *offset
+ }
+ }
+ for _, v := range variants {
+ fmt.Fprintf(stdout, "%s:g.%s\t%s\t%d\t%s\t%s\n", *sequence, v.String(), *sequence, v.Position, v.Ref, v.New)
+ }
+ return 0
+}
--- /dev/null
+package main
+
+import (
+ "bytes"
+ "io/ioutil"
+ "os"
+
+ "gopkg.in/check.v1"
+)
+
+type diffSuite struct{}
+
+var _ = check.Suite(&diffSuite{})
+
+func (s *diffSuite) TestDiff(c *check.C) {
+ tempdir, err := ioutil.TempDir("", "")
+ c.Assert(err, check.IsNil)
+ defer os.RemoveAll(tempdir)
+
+ err = ioutil.WriteFile(tempdir+"/f1.fa", []byte(">f1\nactgactCacgtacgt\nactgactgacgAAcgt\n"), 0700)
+ c.Assert(err, check.IsNil)
+ err = ioutil.WriteFile(tempdir+"/f2.fa", []byte(">f2\nactgactGacgtacgt\nactgactgacgTTcgtA\n"), 0700)
+ c.Assert(err, check.IsNil)
+
+ var output bytes.Buffer
+ exited := (&diffFasta{}).RunCommand("diff-fasta", []string{"-sequence", "chr2", "-offset", "1000", tempdir + "/f1.fa", tempdir + "/f2.fa"}, nil, &output, os.Stderr)
+ c.Check(exited, check.Equals, 0)
+ c.Check("\n"+output.String(), check.Equals, `
+chr2:g.1008C>G chr2 1008 C G
+chr2:g.1028_1029delinsTT chr2 1028 AA TT
+chr2:g.1032_1033insA chr2 1033 A
+`)
+}
github.com/kshedden/gonpy v0.0.0-20190510000443-66c21fac4672
github.com/prometheus/client_golang v1.5.0 // indirect
github.com/prometheus/procfs v0.0.10 // indirect
+ github.com/sergi/go-diff v1.0.0
github.com/sirupsen/logrus v1.4.2
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073
golang.org/x/net v0.0.0-20200301022130-244492dfa37a
github.com/prometheus/procfs v0.0.10 h1:QJQN3jYQhkamO4mhfUWqdDH2asK7ONOI9MTWjyAxNKM=
github.com/prometheus/procfs v0.0.10/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
github.com/satori/go.uuid v1.2.1-0.20180103174451-36e9d2ebbde5/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
+github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4=
--- /dev/null
+package hgvs
+
+import (
+ "fmt"
+ "time"
+
+ "github.com/sergi/go-diff/diffmatchpatch"
+)
+
+type Variant struct {
+ Position int
+ Ref string
+ New string
+}
+
+func (v *Variant) String() string {
+ switch {
+ case len(v.New) == 0 && len(v.Ref) == 1:
+ return fmt.Sprintf("%ddel", v.Position)
+ case len(v.New) == 0:
+ return fmt.Sprintf("%d_%ddel", v.Position, v.Position+len(v.Ref)-1)
+ case len(v.Ref) == 1 && len(v.New) == 1:
+ return fmt.Sprintf("%d%s>%s", v.Position, v.Ref, v.New)
+ case len(v.Ref) == 0:
+ return fmt.Sprintf("%d_%dins%s", v.Position-1, v.Position, v.New)
+ case len(v.Ref) == 1 && len(v.New) > 0:
+ return fmt.Sprintf("%ddelins%s", v.Position, v.New)
+ default:
+ return fmt.Sprintf("%d_%ddelins%s", v.Position, v.Position+len(v.Ref)-1, v.New)
+ }
+}
+
+func Diff(a, b string) []Variant {
+ dmp := diffmatchpatch.New()
+ diffs := cleanup(dmp.DiffCleanupEfficiency(dmp.DiffBisect(a, b, time.Time{})))
+ pos := 1
+ var variants []Variant
+ for i := 0; i < len(diffs); i++ {
+ switch diffs[i].Type {
+ case diffmatchpatch.DiffEqual:
+ pos += len(diffs[i].Text)
+ case diffmatchpatch.DiffDelete:
+ if i+1 < len(diffs) && diffs[i+1].Type == diffmatchpatch.DiffInsert {
+ // deletion followed by insertion
+ variants = append(variants, Variant{Position: pos, Ref: diffs[i].Text, New: diffs[i+1].Text})
+ pos += len(diffs[i].Text)
+ i++
+ } else {
+ variants = append(variants, Variant{Position: pos, Ref: diffs[i].Text})
+ pos += len(diffs[i].Text)
+ }
+ case diffmatchpatch.DiffInsert:
+ if i+1 < len(diffs) && diffs[i+1].Type == diffmatchpatch.DiffDelete {
+ // insertion followed by deletion
+ variants = append(variants, Variant{Position: pos, Ref: diffs[i+1].Text, New: diffs[i].Text})
+ pos += len(diffs[i+1].Text)
+ i++
+ } else {
+ variants = append(variants, Variant{Position: pos, New: diffs[i].Text})
+ }
+ }
+ }
+ return variants
+}
+
+func cleanup(in []diffmatchpatch.Diff) (out []diffmatchpatch.Diff) {
+ for i := 0; i < len(in); i++ {
+ d := in[i]
+ for i < len(in)-1 && in[i].Type == in[i+1].Type {
+ d.Text += in[i+1].Text
+ i++
+ }
+ out = append(out, d)
+ }
+ return
+}
--- /dev/null
+package hgvs
+
+import (
+ "testing"
+
+ "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) { check.TestingT(t) }
+
+type diffSuite struct{}
+
+var _ = check.Suite(&diffSuite{})
+
+func (s *diffSuite) TestDiff(c *check.C) {
+ for _, trial := range []struct {
+ a string
+ b string
+ expect []string
+ }{
+ {
+ a: "aaaaaaaaaa",
+ b: "aaaaCaaaaa",
+ expect: []string{"5a>C"},
+ },
+ {
+ a: "aaaacGcaaa",
+ b: "aaaaccaaa",
+ expect: []string{"6del"},
+ },
+ {
+ a: "aaaacGGcaaa",
+ b: "aaaaccaaa",
+ expect: []string{"6_7del"},
+ },
+ {
+ a: "aaaac",
+ b: "aaaa",
+ expect: []string{"5del"},
+ },
+ {
+ a: "aaaa",
+ b: "aaCaa",
+ expect: []string{"2_3insC"},
+ },
+ {
+ a: "aaGGGtt",
+ b: "aaCCCtt",
+ expect: []string{"3_5delinsCCC"},
+ },
+ {
+ a: "aa",
+ b: "aaCCC",
+ expect: []string{"2_3insCCC"},
+ },
+ {
+ a: "aaGGttAAtttt",
+ b: "aaCCttttttC",
+ expect: []string{"3_4delinsCC", "7_8del", "12_13insC"},
+ },
+ } {
+ c.Log(trial)
+ var vars []string
+ for _, v := range Diff(trial.a, trial.b) {
+ vars = append(vars, v.String())
+ }
+ c.Check(vars, check.DeepEquals, trial.expect)
+ }
+}