Less verbose logging.
[lightning.git] / filter.go
1 package main
2
3 import (
4         "bufio"
5         "encoding/gob"
6         "errors"
7         "flag"
8         "fmt"
9         "io"
10         "io/ioutil"
11         "net/http"
12         _ "net/http/pprof"
13         "os"
14
15         "git.arvados.org/arvados.git/sdk/go/arvados"
16         log "github.com/sirupsen/logrus"
17 )
18
19 type filterer struct {
20         output io.Writer
21 }
22
23 func (cmd *filterer) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
24         var err error
25         defer func() {
26                 if err != nil {
27                         fmt.Fprintf(stderr, "%s\n", err)
28                 }
29         }()
30         flags := flag.NewFlagSet("", flag.ContinueOnError)
31         flags.SetOutput(stderr)
32         pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
33         runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
34         projectUUID := flags.String("project", "", "project `UUID` for output data")
35         inputFilename := flags.String("i", "-", "input `file`")
36         outputFilename := flags.String("o", "-", "output `file`")
37         maxvariants := flags.Int("max-variants", -1, "drop tiles with more than `N` variants")
38         mincoverage := flags.Float64("min-coverage", 1, "drop tiles with coverage less than `P` across all haplotypes (0 < P ≤ 1)")
39         maxtag := flags.Int("max-tag", -1, "drop tiles with tag ID > `N`")
40         err = flags.Parse(args)
41         if err == flag.ErrHelp {
42                 err = nil
43                 return 0
44         } else if err != nil {
45                 return 2
46         }
47         cmd.output = stdout
48
49         if *pprof != "" {
50                 go func() {
51                         log.Println(http.ListenAndServe(*pprof, nil))
52                 }()
53         }
54
55         if !*runlocal {
56                 if *outputFilename != "-" {
57                         err = errors.New("cannot specify output file in container mode: not implemented")
58                         return 1
59                 }
60                 runner := arvadosContainerRunner{
61                         Name:        "lightning filter",
62                         Client:      arvados.NewClientFromEnv(),
63                         ProjectUUID: *projectUUID,
64                         RAM:         64000000000,
65                         VCPUs:       2,
66                 }
67                 err = runner.TranslatePaths(inputFilename)
68                 if err != nil {
69                         return 1
70                 }
71                 runner.Args = []string{"filter", "-local=true",
72                         "-i", *inputFilename,
73                         "-o", "/mnt/output/library.gob",
74                         "-max-variants", fmt.Sprintf("%d", *maxvariants),
75                         "-min-coverage", fmt.Sprintf("%f", *mincoverage),
76                         "-max-tag", fmt.Sprintf("%d", *maxtag),
77                 }
78                 var output string
79                 output, err = runner.Run()
80                 if err != nil {
81                         return 1
82                 }
83                 fmt.Fprintln(stdout, output+"/library.gob")
84                 return 0
85         }
86
87         var infile io.ReadCloser
88         if *inputFilename == "-" {
89                 infile = ioutil.NopCloser(stdin)
90         } else {
91                 infile, err = os.Open(*inputFilename)
92                 if err != nil {
93                         return 1
94                 }
95                 defer infile.Close()
96         }
97         log.Print("reading")
98         cgs, err := ReadCompactGenomes(infile)
99         if err != nil {
100                 return 1
101         }
102         err = infile.Close()
103         if err != nil {
104                 return 1
105         }
106         log.Printf("reading done, %d genomes", len(cgs))
107
108         log.Print("filtering")
109         ntags := 0
110         for _, cg := range cgs {
111                 if ntags < len(cg.Variants)/2 {
112                         ntags = len(cg.Variants) / 2
113                 }
114                 if *maxvariants < 0 {
115                         continue
116                 }
117                 maxVariantID := tileVariantID(*maxvariants)
118                 for idx, variant := range cg.Variants {
119                         if variant > maxVariantID {
120                                 for _, cg := range cgs {
121                                         if len(cg.Variants) > idx {
122                                                 cg.Variants[idx & ^1] = 0
123                                                 cg.Variants[idx|1] = 0
124                                         }
125                                 }
126                         }
127                 }
128         }
129
130         if *maxtag >= 0 && ntags > *maxtag {
131                 ntags = *maxtag
132                 for i, cg := range cgs {
133                         if len(cg.Variants) > *maxtag*2 {
134                                 cgs[i].Variants = cg.Variants[:*maxtag*2]
135                         }
136                 }
137         }
138
139         if *mincoverage < 1 {
140                 mincov := int(*mincoverage * float64(len(cgs)*2))
141                 cov := make([]int, ntags)
142                 for _, cg := range cgs {
143                         for idx, variant := range cg.Variants {
144                                 if variant > 0 {
145                                         cov[idx>>1]++
146                                 }
147                         }
148                 }
149                 for tag, c := range cov {
150                         if c < mincov {
151                                 for _, cg := range cgs {
152                                         if len(cg.Variants) > tag*2 {
153                                                 cg.Variants[tag*2] = 0
154                                                 cg.Variants[tag*2+1] = 0
155                                         }
156                                 }
157                         }
158                 }
159         }
160
161         log.Print("filtering done")
162
163         var outfile io.WriteCloser
164         if *outputFilename == "-" {
165                 outfile = nopCloser{cmd.output}
166         } else {
167                 outfile, err = os.OpenFile(*outputFilename, os.O_CREATE|os.O_WRONLY, 0777)
168                 if err != nil {
169                         return 1
170                 }
171                 defer outfile.Close()
172         }
173         w := bufio.NewWriter(outfile)
174         enc := gob.NewEncoder(w)
175         log.Print("writing")
176         err = enc.Encode(LibraryEntry{
177                 CompactGenomes: cgs,
178         })
179         if err != nil {
180                 return 1
181         }
182         log.Print("writing done")
183         err = w.Flush()
184         if err != nil {
185                 return 1
186         }
187         err = outfile.Close()
188         if err != nil {
189                 return 1
190         }
191         return 0
192 }