Generate genome file from fasta.
[lightning.git] / ref2genome.go
1 package main
2
3 import (
4         "bufio"
5         "bytes"
6         "compress/gzip"
7         "errors"
8         "flag"
9         "fmt"
10         "io"
11         "net/http"
12         _ "net/http/pprof"
13         "os"
14         "strings"
15
16         "git.arvados.org/arvados.git/sdk/go/arvados"
17         log "github.com/sirupsen/logrus"
18 )
19
20 type ref2genome struct {
21         refFile        string
22         projectUUID    string
23         outputFilename string
24         runLocal       bool
25 }
26
27 func (cmd *ref2genome) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
28         var err error
29         defer func() {
30                 if err != nil {
31                         fmt.Fprintf(stderr, "%s\n", err)
32                 }
33         }()
34         flags := flag.NewFlagSet("", flag.ContinueOnError)
35         flags.SetOutput(stderr)
36         flags.StringVar(&cmd.refFile, "ref", "", "reference fasta `file`")
37         flags.StringVar(&cmd.projectUUID, "project", "", "project `UUID` for containers and output data")
38         flags.StringVar(&cmd.outputFilename, "o", "", "output filename")
39         flags.BoolVar(&cmd.runLocal, "local", false, "run on local host (default: run in an arvados container)")
40         priority := flags.Int("priority", 500, "container request priority")
41         pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
42         err = flags.Parse(args)
43         if err == flag.ErrHelp {
44                 err = nil
45                 return 0
46         } else if err != nil {
47                 return 2
48         } else if cmd.refFile == "" {
49                 err = errors.New("reference data (-ref) not specified")
50                 return 2
51         }
52
53         if *pprof != "" {
54                 go func() {
55                         log.Println(http.ListenAndServe(*pprof, nil))
56                 }()
57         }
58
59         if !cmd.runLocal {
60                 if cmd.outputFilename != "" {
61                         err = errors.New("cannot specify output filename in non-local mode")
62                         return 2
63                 }
64                 runner := arvadosContainerRunner{
65                         Name:        "lightning ref2genome",
66                         Client:      arvados.NewClientFromEnv(),
67                         ProjectUUID: cmd.projectUUID,
68                         RAM:         1 << 30,
69                         Priority:    *priority,
70                 }
71                 err = runner.TranslatePaths(&cmd.refFile)
72                 if err != nil {
73                         return 1
74                 }
75                 runner.Args = []string{"ref2genome", "-local=true", "-ref", cmd.refFile, "-o", "/mnt/output/ref.genome"}
76                 var output string
77                 output, err = runner.Run()
78                 if err != nil {
79                         return 1
80                 }
81                 fmt.Fprintln(stdout, output)
82                 return 0
83         }
84
85         var out io.WriteCloser
86         if cmd.outputFilename == "" {
87                 out = nopCloser{stdout}
88         } else {
89                 out, err = os.OpenFile(cmd.outputFilename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
90                 if err != nil {
91                         return 1
92                 }
93         }
94         f, err := os.Open(cmd.refFile)
95         if err != nil {
96                 return 1
97         }
98         defer f.Close()
99         var in io.Reader
100         if strings.HasSuffix(cmd.refFile, ".gz") {
101                 in, err = gzip.NewReader(f)
102                 if err != nil {
103                         return 1
104                 }
105         } else {
106                 in = f
107         }
108         label, seqlen := "", 0
109         scanner := bufio.NewScanner(in)
110         for scanner.Scan() {
111                 buf := scanner.Bytes()
112                 if len(buf) > 0 && buf[0] == '>' {
113                         if label != "" {
114                                 fmt.Fprintf(out, "%s\t%d\n", label, seqlen)
115                         }
116                         label = strings.TrimSpace(string(buf[1:]))
117                         seqlen = 0
118                 } else {
119                         seqlen += len(bytes.TrimSpace(buf))
120                 }
121         }
122         if label != "" {
123                 fmt.Fprintf(out, "%s\t%d\n", label, seqlen)
124         }
125         if err = scanner.Err(); err != nil {
126                 return 1
127         }
128         if err = out.Close(); err != nil {
129                 return 1
130         }
131         return 0
132 }