Fix missed error check.
[lightning.git] / stats.go
1 package main
2
3 import (
4         "bufio"
5         "encoding/gob"
6         "encoding/json"
7         "errors"
8         "flag"
9         "fmt"
10         "io"
11         "io/ioutil"
12         "net/http"
13         _ "net/http/pprof"
14         "os"
15
16         "git.arvados.org/arvados.git/sdk/go/arvados"
17         log "github.com/sirupsen/logrus"
18 )
19
20 type stats struct {
21         debugUnplaced bool
22 }
23
24 func (cmd *stats) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
25         var err error
26         defer func() {
27                 if err != nil {
28                         fmt.Fprintf(stderr, "%s\n", err)
29                 }
30         }()
31         flags := flag.NewFlagSet("", flag.ContinueOnError)
32         flags.SetOutput(stderr)
33         pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
34         runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
35         projectUUID := flags.String("project", "", "project `UUID` for output data")
36         priority := flags.Int("priority", 500, "container request priority")
37         inputFilename := flags.String("i", "-", "input `file`")
38         outputFilename := flags.String("o", "-", "output `file`")
39         flags.BoolVar(&cmd.debugUnplaced, "debug-unplaced", false, "output full list of unplaced tags")
40         err = flags.Parse(args)
41         if err == flag.ErrHelp {
42                 err = nil
43                 return 0
44         } else if err != nil {
45                 return 2
46         }
47
48         if *pprof != "" {
49                 go func() {
50                         log.Println(http.ListenAndServe(*pprof, nil))
51                 }()
52         }
53
54         if !*runlocal {
55                 if *outputFilename != "-" {
56                         err = errors.New("cannot specify output file in container mode: not implemented")
57                         return 1
58                 }
59                 runner := arvadosContainerRunner{
60                         Name:        "lightning stats",
61                         Client:      arvados.NewClientFromEnv(),
62                         ProjectUUID: *projectUUID,
63                         RAM:         16000000000,
64                         VCPUs:       1,
65                         Priority:    *priority,
66                 }
67                 err = runner.TranslatePaths(inputFilename)
68                 if err != nil {
69                         return 1
70                 }
71                 runner.Args = []string{"stats", "-local=true", fmt.Sprintf("-debug-unplaced=%v", cmd.debugUnplaced), "-i", *inputFilename, "-o", "/mnt/output/stats.json"}
72                 var output string
73                 output, err = runner.Run()
74                 if err != nil {
75                         return 1
76                 }
77                 fmt.Fprintln(stdout, output+"/stats.json")
78                 return 0
79         }
80
81         var input io.ReadCloser
82         if *inputFilename == "-" {
83                 input = ioutil.NopCloser(stdin)
84         } else {
85                 input, err = os.Open(*inputFilename)
86                 if err != nil {
87                         return 1
88                 }
89                 defer input.Close()
90         }
91
92         var output io.WriteCloser
93         if *outputFilename == "-" {
94                 output = nopCloser{stdout}
95         } else {
96                 output, err = os.OpenFile(*outputFilename, os.O_CREATE|os.O_WRONLY, 0777)
97                 if err != nil {
98                         return 1
99                 }
100                 defer output.Close()
101         }
102
103         bufw := bufio.NewWriter(output)
104         err = cmd.doStats(input, bufw)
105         if err != nil {
106                 return 1
107         }
108         err = bufw.Flush()
109         if err != nil {
110                 return 1
111         }
112         err = output.Close()
113         if err != nil {
114                 return 1
115         }
116         return 0
117 }
118
119 func (cmd *stats) doStats(input io.Reader, output io.Writer) error {
120         var ret struct {
121                 Genomes          int
122                 Tags             int
123                 TagsPlacedNTimes []int // a[x]==y means there were y tags that placed x times
124                 TileVariants     int
125                 VariantsBySize   []int
126                 NCVariantsBySize []int
127                 UnplacedTags     []string `json:",omitempty"`
128         }
129
130         var tagSet [][]byte
131         var tagPlacements []int
132         dec := gob.NewDecoder(bufio.NewReaderSize(input, 1<<26))
133         for {
134                 var ent LibraryEntry
135                 err := dec.Decode(&ent)
136                 if err == io.EOF {
137                         break
138                 } else if err != nil {
139                         return err
140                 }
141                 ret.Genomes += len(ent.CompactGenomes)
142                 ret.TileVariants += len(ent.TileVariants)
143                 if len(ent.TagSet) > 0 {
144                         if ret.Tags > 0 {
145                                 return errors.New("invalid input: contains multiple tagsets")
146                         }
147                         ret.Tags = len(ent.TagSet)
148                         tagSet = ent.TagSet
149                 }
150                 for _, g := range ent.CompactGenomes {
151                         if need := (len(g.Variants)+1)/2 - len(tagPlacements); need > 0 {
152                                 tagPlacements = append(tagPlacements, make([]int, need)...)
153                         }
154                         for idx, v := range g.Variants {
155                                 if v > 0 {
156                                         tagPlacements[idx/2]++
157                                 }
158                         }
159                 }
160                 for _, tv := range ent.TileVariants {
161                         if need := 1 + len(tv.Sequence) - len(ret.VariantsBySize); need > 0 {
162                                 ret.VariantsBySize = append(ret.VariantsBySize, make([]int, need)...)
163                                 ret.NCVariantsBySize = append(ret.NCVariantsBySize, make([]int, need)...)
164                         }
165
166                         hasNoCalls := false
167                         for _, b := range tv.Sequence {
168                                 if b != 'a' && b != 'c' && b != 'g' && b != 't' {
169                                         hasNoCalls = true
170                                 }
171                         }
172
173                         if hasNoCalls {
174                                 ret.NCVariantsBySize[len(tv.Sequence)]++
175                         } else {
176                                 ret.VariantsBySize[len(tv.Sequence)]++
177                         }
178                 }
179         }
180         for id, p := range tagPlacements {
181                 for len(ret.TagsPlacedNTimes) <= p {
182                         ret.TagsPlacedNTimes = append(ret.TagsPlacedNTimes, 0)
183                 }
184                 ret.TagsPlacedNTimes[p]++
185                 if cmd.debugUnplaced && p == 0 {
186                         ret.UnplacedTags = append(ret.UnplacedTags, fmt.Sprintf("%d %s", id, tagSet[id]))
187                 }
188         }
189
190         return json.NewEncoder(output).Encode(ret)
191 }