Don't use tags that appear more than once per sequence.
[lightning.git] / ref2genome.go
1 // Copyright (C) The Lightning Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package lightning
6
7 import (
8         "bufio"
9         "bytes"
10         "compress/gzip"
11         "errors"
12         "flag"
13         "fmt"
14         "io"
15         "net/http"
16         _ "net/http/pprof"
17         "os"
18         "strings"
19
20         "git.arvados.org/arvados.git/sdk/go/arvados"
21         log "github.com/sirupsen/logrus"
22 )
23
24 type ref2genome struct {
25         refFile        string
26         projectUUID    string
27         outputFilename string
28         runLocal       bool
29 }
30
31 func (cmd *ref2genome) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
32         var err error
33         defer func() {
34                 if err != nil {
35                         fmt.Fprintf(stderr, "%s\n", err)
36                 }
37         }()
38         flags := flag.NewFlagSet("", flag.ContinueOnError)
39         flags.SetOutput(stderr)
40         flags.StringVar(&cmd.refFile, "ref", "", "reference fasta `file`")
41         flags.StringVar(&cmd.projectUUID, "project", "", "project `UUID` for containers and output data")
42         flags.StringVar(&cmd.outputFilename, "o", "", "output filename")
43         flags.BoolVar(&cmd.runLocal, "local", false, "run on local host (default: run in an arvados container)")
44         priority := flags.Int("priority", 500, "container request priority")
45         pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
46         err = flags.Parse(args)
47         if err == flag.ErrHelp {
48                 err = nil
49                 return 0
50         } else if err != nil {
51                 return 2
52         } else if cmd.refFile == "" {
53                 err = errors.New("reference data (-ref) not specified")
54                 return 2
55         }
56
57         if *pprof != "" {
58                 go func() {
59                         log.Println(http.ListenAndServe(*pprof, nil))
60                 }()
61         }
62
63         if !cmd.runLocal {
64                 if cmd.outputFilename != "" {
65                         err = errors.New("cannot specify output filename in non-local mode")
66                         return 2
67                 }
68                 runner := arvadosContainerRunner{
69                         Name:        "lightning ref2genome",
70                         Client:      arvados.NewClientFromEnv(),
71                         ProjectUUID: cmd.projectUUID,
72                         RAM:         1 << 30,
73                         Priority:    *priority,
74                         VCPUs:       1,
75                 }
76                 err = runner.TranslatePaths(&cmd.refFile)
77                 if err != nil {
78                         return 1
79                 }
80                 runner.Args = []string{"ref2genome", "-local=true", "-ref", cmd.refFile, "-o", "/mnt/output/ref.genome"}
81                 var output string
82                 output, err = runner.Run()
83                 if err != nil {
84                         return 1
85                 }
86                 fmt.Fprintln(stdout, output+"/ref.genome")
87                 return 0
88         }
89
90         var out io.WriteCloser
91         if cmd.outputFilename == "" {
92                 out = nopCloser{stdout}
93         } else {
94                 out, err = os.OpenFile(cmd.outputFilename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
95                 if err != nil {
96                         return 1
97                 }
98         }
99         f, err := os.Open(cmd.refFile)
100         if err != nil {
101                 return 1
102         }
103         defer f.Close()
104         var in io.Reader
105         if strings.HasSuffix(cmd.refFile, ".gz") {
106                 in, err = gzip.NewReader(f)
107                 if err != nil {
108                         return 1
109                 }
110         } else {
111                 in = f
112         }
113         label, seqlen := "", 0
114         scanner := bufio.NewScanner(in)
115         for scanner.Scan() {
116                 buf := scanner.Bytes()
117                 if len(buf) > 0 && buf[0] == '>' {
118                         if label != "" {
119                                 fmt.Fprintf(out, "%s\t%d\n", label, seqlen)
120                         }
121                         label = strings.TrimSpace(string(buf[1:]))
122                         label = strings.SplitN(label, " ", 2)[0]
123                         seqlen = 0
124                 } else {
125                         seqlen += len(bytes.TrimSpace(buf))
126                 }
127         }
128         if label != "" {
129                 fmt.Fprintf(out, "%s\t%d\n", label, seqlen)
130         }
131         if err = scanner.Err(); err != nil {
132                 return 1
133         }
134         if err = out.Close(); err != nil {
135                 return 1
136         }
137         return 0
138 }