+++ /dev/null
-package main
-
-import (
- "bufio"
- "encoding/gob"
- "flag"
- "fmt"
- "io"
- "log"
- "net/http"
- _ "net/http/pprof"
-)
-
-type filterer struct {
- output io.Writer
-}
-
-func (cmd *filterer) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
- var err error
- defer func() {
- if err != nil {
- fmt.Fprintf(stderr, "%s\n", err)
- }
- }()
- flags := flag.NewFlagSet("", flag.ContinueOnError)
- flags.SetOutput(stderr)
- pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
- maxvariants := flags.Int("max-variants", -1, "drop tiles with more than `N` variants")
- mincoverage := flags.Float64("min-coverage", 1, "drop tiles with coverage less than `P` across all haplotypes (0 < P ≤ 1)")
- maxtag := flags.Int("max-tag", -1, "drop tiles with tag ID > `N`")
- err = flags.Parse(args)
- if err == flag.ErrHelp {
- err = nil
- return 0
- } else if err != nil {
- return 2
- }
- cmd.output = stdout
-
- if *pprof != "" {
- go func() {
- log.Println(http.ListenAndServe(*pprof, nil))
- }()
- }
-
- log.Print("reading")
- cgs, err := ReadCompactGenomes(stdin)
- if err != nil {
- return 1
- }
- log.Printf("reading done, %d genomes", len(cgs))
-
- log.Print("filtering")
- ntags := 0
- for _, cg := range cgs {
- if ntags < len(cg.Variants)/2 {
- ntags = len(cg.Variants) / 2
- }
- for idx, variant := range cg.Variants {
- if int(variant) > *maxvariants {
- for _, cg := range cgs {
- if len(cg.Variants) > idx {
- cg.Variants[idx & ^1] = 0
- cg.Variants[idx|1] = 0
- }
- }
- }
- }
- }
-
- if *maxtag >= 0 && ntags > *maxtag {
- ntags = *maxtag
- for i, cg := range cgs {
- if len(cg.Variants) > *maxtag*2 {
- cgs[i].Variants = cg.Variants[:*maxtag*2]
- }
- }
- }
-
- if *mincoverage < 1 {
- mincov := int(*mincoverage * float64(len(cgs)*2))
- cov := make([]int, ntags)
- for _, cg := range cgs {
- for idx, variant := range cg.Variants {
- if variant > 0 {
- cov[idx>>1]++
- }
- }
- }
- for tag, c := range cov {
- if c < mincov {
- for _, cg := range cgs {
- if len(cg.Variants) > tag*2 {
- cg.Variants[tag*2] = 0
- cg.Variants[tag*2+1] = 0
- }
- }
- }
- }
- }
-
- log.Print("filtering done")
-
- w := bufio.NewWriter(cmd.output)
- enc := gob.NewEncoder(w)
- log.Print("writing")
- err = enc.Encode(LibraryEntry{
- CompactGenomes: cgs,
- })
- if err != nil {
- return 1
- }
- log.Print("writing done")
- err = w.Flush()
- if err != nil {
- return 1
- }
- return 0
-}