19995: Add -max-frequency filter.
[lightning.git] / ref2genome.go
1 // Copyright (C) The Lightning Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package lightning
6
7 import (
8         "bufio"
9         "bytes"
10         "compress/gzip"
11         "errors"
12         "flag"
13         "fmt"
14         "io"
15         "net/http"
16         _ "net/http/pprof"
17         "os"
18         "strings"
19
20         "git.arvados.org/arvados.git/sdk/go/arvados"
21         log "github.com/sirupsen/logrus"
22 )
23
24 type ref2genome struct {
25         refFile        string
26         projectUUID    string
27         outputFilename string
28         runLocal       bool
29 }
30
31 func (cmd *ref2genome) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
32         var err error
33         defer func() {
34                 if err != nil {
35                         fmt.Fprintf(stderr, "%s\n", err)
36                 }
37         }()
38         flags := flag.NewFlagSet("", flag.ContinueOnError)
39         flags.SetOutput(stderr)
40         flags.StringVar(&cmd.refFile, "ref", "", "reference fasta `file`")
41         flags.StringVar(&cmd.projectUUID, "project", "", "project `UUID` for containers and output data")
42         flags.StringVar(&cmd.outputFilename, "o", "", "output filename")
43         flags.BoolVar(&cmd.runLocal, "local", false, "run on local host (default: run in an arvados container)")
44         priority := flags.Int("priority", 500, "container request priority")
45         pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
46         err = flags.Parse(args)
47         if err == flag.ErrHelp {
48                 err = nil
49                 return 0
50         } else if err != nil {
51                 return 2
52         } else if cmd.refFile == "" {
53                 err = errors.New("reference data (-ref) not specified")
54                 return 2
55         } else if flags.NArg() > 0 {
56                 err = fmt.Errorf("errant command line arguments after parsed flags: %v", flags.Args())
57                 return 2
58         }
59
60         if *pprof != "" {
61                 go func() {
62                         log.Println(http.ListenAndServe(*pprof, nil))
63                 }()
64         }
65
66         if !cmd.runLocal {
67                 if cmd.outputFilename != "" {
68                         err = errors.New("cannot specify output filename in non-local mode")
69                         return 2
70                 }
71                 runner := arvadosContainerRunner{
72                         Name:        "lightning ref2genome",
73                         Client:      arvados.NewClientFromEnv(),
74                         ProjectUUID: cmd.projectUUID,
75                         RAM:         1 << 30,
76                         Priority:    *priority,
77                         VCPUs:       1,
78                 }
79                 err = runner.TranslatePaths(&cmd.refFile)
80                 if err != nil {
81                         return 1
82                 }
83                 runner.Args = []string{"ref2genome", "-local=true", "-ref", cmd.refFile, "-o", "/mnt/output/ref.genome"}
84                 var output string
85                 output, err = runner.Run()
86                 if err != nil {
87                         return 1
88                 }
89                 fmt.Fprintln(stdout, output+"/ref.genome")
90                 return 0
91         }
92
93         var out io.WriteCloser
94         if cmd.outputFilename == "" {
95                 out = nopCloser{stdout}
96         } else {
97                 out, err = os.OpenFile(cmd.outputFilename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
98                 if err != nil {
99                         return 1
100                 }
101         }
102         f, err := os.Open(cmd.refFile)
103         if err != nil {
104                 return 1
105         }
106         defer f.Close()
107         var in io.Reader
108         if strings.HasSuffix(cmd.refFile, ".gz") {
109                 in, err = gzip.NewReader(f)
110                 if err != nil {
111                         return 1
112                 }
113         } else {
114                 in = f
115         }
116         label, seqlen := "", 0
117         scanner := bufio.NewScanner(in)
118         for scanner.Scan() {
119                 buf := scanner.Bytes()
120                 if len(buf) > 0 && buf[0] == '>' {
121                         if label != "" {
122                                 fmt.Fprintf(out, "%s\t%d\n", label, seqlen)
123                         }
124                         label = strings.TrimSpace(string(buf[1:]))
125                         label = strings.SplitN(label, " ", 2)[0]
126                         seqlen = 0
127                 } else {
128                         seqlen += len(bytes.TrimSpace(buf))
129                 }
130         }
131         if label != "" {
132                 fmt.Fprintf(out, "%s\t%d\n", label, seqlen)
133         }
134         if err = scanner.Err(); err != nil {
135                 return 1
136         }
137         if err = out.Close(); err != nil {
138                 return 1
139         }
140         return 0
141 }