14 type filterer struct {
18 func (cmd *filterer) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
22 fmt.Fprintf(stderr, "%s\n", err)
25 flags := flag.NewFlagSet("", flag.ContinueOnError)
26 flags.SetOutput(stderr)
27 pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
28 maxvariants := flags.Int("max-variants", -1, "drop tiles with more than `N` variants")
29 mincoverage := flags.Float64("min-coverage", 1, "drop tiles with coverage less than `P` across all haplotypes (0 < P ≤ 1)")
30 maxtag := flags.Int("max-tag", -1, "drop tiles with tag ID > `N`")
31 err = flags.Parse(args)
32 if err == flag.ErrHelp {
35 } else if err != nil {
42 log.Println(http.ListenAndServe(*pprof, nil))
47 cgs, err := ReadCompactGenomes(stdin)
51 log.Printf("reading done, %d genomes", len(cgs))
53 log.Print("filtering")
55 for _, cg := range cgs {
56 if ntags < len(cg.Variants)/2 {
57 ntags = len(cg.Variants) / 2
62 maxVariantID := tileVariantID(*maxvariants)
63 for idx, variant := range cg.Variants {
64 if variant > maxVariantID {
65 for _, cg := range cgs {
66 if len(cg.Variants) > idx {
67 cg.Variants[idx & ^1] = 0
68 cg.Variants[idx|1] = 0
75 if *maxtag >= 0 && ntags > *maxtag {
77 for i, cg := range cgs {
78 if len(cg.Variants) > *maxtag*2 {
79 cgs[i].Variants = cg.Variants[:*maxtag*2]
85 mincov := int(*mincoverage * float64(len(cgs)*2))
86 cov := make([]int, ntags)
87 for _, cg := range cgs {
88 for idx, variant := range cg.Variants {
94 for tag, c := range cov {
96 for _, cg := range cgs {
97 if len(cg.Variants) > tag*2 {
98 cg.Variants[tag*2] = 0
99 cg.Variants[tag*2+1] = 0
106 log.Print("filtering done")
108 w := bufio.NewWriter(cmd.output)
109 enc := gob.NewEncoder(w)
111 err = enc.Encode(LibraryEntry{
117 log.Print("writing done")