Add filter subcommand.
[lightning.git] / filter.go
1 package main
2
3 import (
4         "bufio"
5         "encoding/gob"
6         "flag"
7         "fmt"
8         "io"
9         "log"
10         "net/http"
11         _ "net/http/pprof"
12 )
13
14 type filterer struct {
15         output io.Writer
16 }
17
18 func (cmd *filterer) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
19         var err error
20         defer func() {
21                 if err != nil {
22                         fmt.Fprintf(stderr, "%s\n", err)
23                 }
24         }()
25         flags := flag.NewFlagSet("", flag.ContinueOnError)
26         flags.SetOutput(stderr)
27         pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
28         maxvariants := flags.Int("max-variants", -1, "drop tiles with more than `N` variants")
29         mincoverage := flags.Float64("min-coverage", 1, "drop tiles with coverage less than `P` across all haplotypes (0 < P ≤ 1)")
30         maxtag := flags.Int("max-tag", -1, "drop tiles with tag ID > `N`")
31         err = flags.Parse(args)
32         if err == flag.ErrHelp {
33                 err = nil
34                 return 0
35         } else if err != nil {
36                 return 2
37         }
38         cmd.output = stdout
39
40         if *pprof != "" {
41                 go func() {
42                         log.Println(http.ListenAndServe(*pprof, nil))
43                 }()
44         }
45
46         log.Print("reading")
47         cgs, err := ReadCompactGenomes(stdin)
48         if err != nil {
49                 return 1
50         }
51         log.Printf("reading done, %d genomes", len(cgs))
52
53         log.Print("filtering")
54         ntags := 0
55         for _, cg := range cgs {
56                 if ntags < len(cg.Variants)/2 {
57                         ntags = len(cg.Variants) / 2
58                 }
59                 for idx, variant := range cg.Variants {
60                         if int(variant) > *maxvariants {
61                                 for _, cg := range cgs {
62                                         if len(cg.Variants) > idx {
63                                                 cg.Variants[idx & ^1] = 0
64                                                 cg.Variants[idx|1] = 0
65                                         }
66                                 }
67                         }
68                 }
69         }
70
71         if *maxtag >= 0 && ntags > *maxtag {
72                 ntags = *maxtag
73                 for i, cg := range cgs {
74                         if len(cg.Variants) > *maxtag*2 {
75                                 cgs[i].Variants = cg.Variants[:*maxtag*2]
76                         }
77                 }
78         }
79
80         if *mincoverage < 1 {
81                 mincov := int(*mincoverage * float64(len(cgs)*2))
82                 cov := make([]int, ntags)
83                 for _, cg := range cgs {
84                         for idx, variant := range cg.Variants {
85                                 if variant > 0 {
86                                         cov[idx>>1]++
87                                 }
88                         }
89                 }
90                 for tag, c := range cov {
91                         if c < mincov {
92                                 for _, cg := range cgs {
93                                         if len(cg.Variants) > tag*2 {
94                                                 cg.Variants[tag*2] = 0
95                                                 cg.Variants[tag*2+1] = 0
96                                         }
97                                 }
98                         }
99                 }
100         }
101
102         log.Print("filtering done")
103
104         w := bufio.NewWriter(cmd.output)
105         enc := gob.NewEncoder(w)
106         log.Print("writing")
107         err = enc.Encode(LibraryEntry{
108                 CompactGenomes: cgs,
109         })
110         if err != nil {
111                 return 1
112         }
113         log.Print("writing done")
114         err = w.Flush()
115         if err != nil {
116                 return 1
117         }
118         return 0
119 }