Fix HGVS diff: GGAA>AAAA is GG>AA, not delGG,=AA,insAA.
[lightning.git] / ref2genome.go
1 package lightning
2
3 import (
4         "bufio"
5         "bytes"
6         "compress/gzip"
7         "errors"
8         "flag"
9         "fmt"
10         "io"
11         "net/http"
12         _ "net/http/pprof"
13         "os"
14         "strings"
15
16         "git.arvados.org/arvados.git/sdk/go/arvados"
17         log "github.com/sirupsen/logrus"
18 )
19
20 type ref2genome struct {
21         refFile        string
22         projectUUID    string
23         outputFilename string
24         runLocal       bool
25 }
26
27 func (cmd *ref2genome) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
28         var err error
29         defer func() {
30                 if err != nil {
31                         fmt.Fprintf(stderr, "%s\n", err)
32                 }
33         }()
34         flags := flag.NewFlagSet("", flag.ContinueOnError)
35         flags.SetOutput(stderr)
36         flags.StringVar(&cmd.refFile, "ref", "", "reference fasta `file`")
37         flags.StringVar(&cmd.projectUUID, "project", "", "project `UUID` for containers and output data")
38         flags.StringVar(&cmd.outputFilename, "o", "", "output filename")
39         flags.BoolVar(&cmd.runLocal, "local", false, "run on local host (default: run in an arvados container)")
40         priority := flags.Int("priority", 500, "container request priority")
41         pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
42         err = flags.Parse(args)
43         if err == flag.ErrHelp {
44                 err = nil
45                 return 0
46         } else if err != nil {
47                 return 2
48         } else if cmd.refFile == "" {
49                 err = errors.New("reference data (-ref) not specified")
50                 return 2
51         }
52
53         if *pprof != "" {
54                 go func() {
55                         log.Println(http.ListenAndServe(*pprof, nil))
56                 }()
57         }
58
59         if !cmd.runLocal {
60                 if cmd.outputFilename != "" {
61                         err = errors.New("cannot specify output filename in non-local mode")
62                         return 2
63                 }
64                 runner := arvadosContainerRunner{
65                         Name:        "lightning ref2genome",
66                         Client:      arvados.NewClientFromEnv(),
67                         ProjectUUID: cmd.projectUUID,
68                         RAM:         1 << 30,
69                         Priority:    *priority,
70                         VCPUs:       1,
71                 }
72                 err = runner.TranslatePaths(&cmd.refFile)
73                 if err != nil {
74                         return 1
75                 }
76                 runner.Args = []string{"ref2genome", "-local=true", "-ref", cmd.refFile, "-o", "/mnt/output/ref.genome"}
77                 var output string
78                 output, err = runner.Run()
79                 if err != nil {
80                         return 1
81                 }
82                 fmt.Fprintln(stdout, output+"/ref.genome")
83                 return 0
84         }
85
86         var out io.WriteCloser
87         if cmd.outputFilename == "" {
88                 out = nopCloser{stdout}
89         } else {
90                 out, err = os.OpenFile(cmd.outputFilename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
91                 if err != nil {
92                         return 1
93                 }
94         }
95         f, err := os.Open(cmd.refFile)
96         if err != nil {
97                 return 1
98         }
99         defer f.Close()
100         var in io.Reader
101         if strings.HasSuffix(cmd.refFile, ".gz") {
102                 in, err = gzip.NewReader(f)
103                 if err != nil {
104                         return 1
105                 }
106         } else {
107                 in = f
108         }
109         label, seqlen := "", 0
110         scanner := bufio.NewScanner(in)
111         for scanner.Scan() {
112                 buf := scanner.Bytes()
113                 if len(buf) > 0 && buf[0] == '>' {
114                         if label != "" {
115                                 fmt.Fprintf(out, "%s\t%d\n", label, seqlen)
116                         }
117                         label = strings.TrimSpace(string(buf[1:]))
118                         label = strings.SplitN(label, " ", 2)[0]
119                         seqlen = 0
120                 } else {
121                         seqlen += len(bytes.TrimSpace(buf))
122                 }
123         }
124         if label != "" {
125                 fmt.Fprintf(out, "%s\t%d\n", label, seqlen)
126         }
127         if err = scanner.Err(); err != nil {
128                 return 1
129         }
130         if err = out.Close(); err != nil {
131                 return 1
132         }
133         return 0
134 }