14 type filterer struct {
18 func (cmd *filterer) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
22 fmt.Fprintf(stderr, "%s\n", err)
25 flags := flag.NewFlagSet("", flag.ContinueOnError)
26 flags.SetOutput(stderr)
27 pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
28 maxvariants := flags.Int("max-variants", -1, "drop tiles with more than `N` variants")
29 mincoverage := flags.Float64("min-coverage", 1, "drop tiles with coverage less than `P` across all haplotypes (0 < P ≤ 1)")
30 maxtag := flags.Int("max-tag", -1, "drop tiles with tag ID > `N`")
31 err = flags.Parse(args)
32 if err == flag.ErrHelp {
35 } else if err != nil {
42 log.Println(http.ListenAndServe(*pprof, nil))
47 cgs, err := ReadCompactGenomes(stdin)
51 log.Printf("reading done, %d genomes", len(cgs))
53 log.Print("filtering")
55 for _, cg := range cgs {
56 if ntags < len(cg.Variants)/2 {
57 ntags = len(cg.Variants) / 2
59 for idx, variant := range cg.Variants {
60 if int(variant) > *maxvariants {
61 for _, cg := range cgs {
62 if len(cg.Variants) > idx {
63 cg.Variants[idx & ^1] = 0
64 cg.Variants[idx|1] = 0
71 if *maxtag >= 0 && ntags > *maxtag {
73 for i, cg := range cgs {
74 if len(cg.Variants) > *maxtag*2 {
75 cgs[i].Variants = cg.Variants[:*maxtag*2]
81 mincov := int(*mincoverage * float64(len(cgs)*2))
82 cov := make([]int, ntags)
83 for _, cg := range cgs {
84 for idx, variant := range cg.Variants {
90 for tag, c := range cov {
92 for _, cg := range cgs {
93 if len(cg.Variants) > tag*2 {
94 cg.Variants[tag*2] = 0
95 cg.Variants[tag*2+1] = 0
102 log.Print("filtering done")
104 w := bufio.NewWriter(cmd.output)
105 enc := gob.NewEncoder(w)
107 err = enc.Encode(LibraryEntry{
113 log.Print("writing done")