15 "git.arvados.org/arvados.git/sdk/go/arvados"
16 log "github.com/sirupsen/logrus"
19 type filterer struct {
23 func (cmd *filterer) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
27 fmt.Fprintf(stderr, "%s\n", err)
30 flags := flag.NewFlagSet("", flag.ContinueOnError)
31 flags.SetOutput(stderr)
32 pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
33 runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
34 projectUUID := flags.String("project", "", "project `UUID` for output data")
35 inputFilename := flags.String("i", "-", "input `file`")
36 outputFilename := flags.String("o", "-", "output `file`")
37 maxvariants := flags.Int("max-variants", -1, "drop tiles with more than `N` variants")
38 mincoverage := flags.Float64("min-coverage", 1, "drop tiles with coverage less than `P` across all haplotypes (0 < P ≤ 1)")
39 maxtag := flags.Int("max-tag", -1, "drop tiles with tag ID > `N`")
40 err = flags.Parse(args)
41 if err == flag.ErrHelp {
44 } else if err != nil {
51 log.Println(http.ListenAndServe(*pprof, nil))
56 if *outputFilename != "-" {
57 err = errors.New("cannot specify output file in container mode: not implemented")
60 runner := arvadosContainerRunner{
61 Name: "lightning filter",
62 Client: arvados.NewClientFromEnv(),
63 ProjectUUID: *projectUUID,
67 err = runner.TranslatePaths(inputFilename)
71 runner.Args = []string{"filter", "-local=true",
73 "-o", "/mnt/output/library.gob",
74 "-max-variants", fmt.Sprintf("%d", *maxvariants),
75 "-min-coverage", fmt.Sprintf("%f", *mincoverage),
76 "-max-tag", fmt.Sprintf("%d", *maxtag),
79 output, err = runner.Run()
83 fmt.Fprintln(stdout, output+"/library.gob")
87 var infile io.ReadCloser
88 if *inputFilename == "-" {
89 infile = ioutil.NopCloser(stdin)
91 infile, err = os.Open(*inputFilename)
98 cgs, err := ReadCompactGenomes(infile)
106 log.Printf("reading done, %d genomes", len(cgs))
108 log.Print("filtering")
110 for _, cg := range cgs {
111 if ntags < len(cg.Variants)/2 {
112 ntags = len(cg.Variants) / 2
114 if *maxvariants < 0 {
117 maxVariantID := tileVariantID(*maxvariants)
118 for idx, variant := range cg.Variants {
119 if variant > maxVariantID {
120 for _, cg := range cgs {
121 if len(cg.Variants) > idx {
122 cg.Variants[idx & ^1] = 0
123 cg.Variants[idx|1] = 0
130 if *maxtag >= 0 && ntags > *maxtag {
132 for i, cg := range cgs {
133 if len(cg.Variants) > *maxtag*2 {
134 cgs[i].Variants = cg.Variants[:*maxtag*2]
139 if *mincoverage < 1 {
140 mincov := int(*mincoverage * float64(len(cgs)*2))
141 cov := make([]int, ntags)
142 for _, cg := range cgs {
143 for idx, variant := range cg.Variants {
149 for tag, c := range cov {
151 for _, cg := range cgs {
152 if len(cg.Variants) > tag*2 {
153 cg.Variants[tag*2] = 0
154 cg.Variants[tag*2+1] = 0
161 log.Print("filtering done")
163 var outfile io.WriteCloser
164 if *outputFilename == "-" {
165 outfile = nopCloser{cmd.output}
167 outfile, err = os.OpenFile(*outputFilename, os.O_CREATE|os.O_WRONLY, 0777)
171 defer outfile.Close()
173 w := bufio.NewWriter(outfile)
174 enc := gob.NewEncoder(w)
176 err = enc.Encode(LibraryEntry{
182 log.Print("writing done")
187 err = outfile.Close()