Add anno2vcf command.
[lightning.git] / slicenumpy.go
index c48a5d54a9ce508d7137f24a078168363a2e40dc..c1aeae6589d33570c4437cdcd59032e17acf6088 100644 (file)
@@ -13,6 +13,7 @@ import (
        "net/http"
        _ "net/http/pprof"
        "os"
+       "regexp"
        "runtime"
        "sort"
        "strings"
@@ -21,7 +22,6 @@ import (
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "github.com/arvados/lightning/hgvs"
        "github.com/kshedden/gonpy"
-       "github.com/sirupsen/logrus"
        log "github.com/sirupsen/logrus"
        "golang.org/x/crypto/blake2b"
 )
@@ -116,6 +116,12 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                return 1
        }
 
+       matchGenome, err := regexp.Compile(cmd.filter.MatchGenome)
+       if err != nil {
+               err = fmt.Errorf("-match-genome: invalid regexp: %q", cmd.filter.MatchGenome)
+               return 1
+       }
+
        taglen := -1
        DecodeLibrary(in0, strings.HasSuffix(infiles[0], ".gz"), func(ent *LibraryEntry) error {
                if len(ent.TagSet) > 0 {
@@ -127,7 +133,9 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                        }
                }
                for _, cg := range ent.CompactGenomes {
-                       cgnames = append(cgnames, cg.Name)
+                       if matchGenome.MatchString(cg.Name) {
+                               cgnames = append(cgnames, cg.Name)
+                       }
                }
                for _, tv := range ent.TileVariants {
                        if tv.Ref {
@@ -148,6 +156,10 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                err = fmt.Errorf("tagset not found")
                return 1
        }
+       if len(cgnames) == 0 {
+               err = fmt.Errorf("no genomes found matching regexp %q", cmd.filter.MatchGenome)
+               return 1
+       }
        sort.Strings(cgnames)
 
        {
@@ -244,7 +256,9 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                                        seq[tv.Tag] = variants
                                }
                                for _, cg := range ent.CompactGenomes {
-                                       cgs[cg.Name] = cg
+                                       if matchGenome.MatchString(cg.Name) {
+                                               cgs[cg.Name] = cg
+                                       }
                                }
                                return nil
                        })
@@ -335,7 +349,7 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                                        diffs, _ := hgvs.Diff(reftilestr, strings.ToUpper(string(tv.Sequence)), 0)
                                        for _, diff := range diffs {
                                                diff.Position += rt.pos
-                                               fmt.Fprintf(annow, "%d,%d,%d,%s:g.%s\n", tag, outcol, remap[v], rt.seqname, diff.String())
+                                               fmt.Fprintf(annow, "%d,%d,%d,%s:g.%s,%s,%d,%s,%s\n", tag, outcol, remap[v], rt.seqname, diff.String(), rt.seqname, diff.Position, diff.Ref, diff.New)
                                        }
                                }
                        }
@@ -377,7 +391,7 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                        if err != nil {
                                return err
                        }
-                       log.WithFields(logrus.Fields{
+                       log.WithFields(log.Fields{
                                "filename": fnm,
                                "rows":     rows,
                                "cols":     cols,