16 "git.arvados.org/arvados.git/sdk/go/arvados"
19 type filterer struct {
23 func (cmd *filterer) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
27 fmt.Fprintf(stderr, "%s\n", err)
30 flags := flag.NewFlagSet("", flag.ContinueOnError)
31 flags.SetOutput(stderr)
32 pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
33 runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
34 projectUUID := flags.String("project", "", "project `UUID` for output data")
35 inputFilename := flags.String("i", "-", "input `file`")
36 outputFilename := flags.String("o", "-", "output `file`")
37 maxvariants := flags.Int("max-variants", -1, "drop tiles with more than `N` variants")
38 mincoverage := flags.Float64("min-coverage", 1, "drop tiles with coverage less than `P` across all haplotypes (0 < P ≤ 1)")
39 maxtag := flags.Int("max-tag", -1, "drop tiles with tag ID > `N`")
40 err = flags.Parse(args)
41 if err == flag.ErrHelp {
44 } else if err != nil {
51 log.Println(http.ListenAndServe(*pprof, nil))
56 if *outputFilename != "-" {
57 err = errors.New("cannot specify output file in container mode: not implemented")
60 runner := arvadosContainerRunner{
61 Name: "lightning filter",
62 Client: arvados.NewClientFromEnv(),
63 ProjectUUID: *projectUUID,
67 err = runner.TranslatePaths(inputFilename)
71 runner.Args = []string{"filter", "-local=true",
73 "-o", "/mnt/output/library.gob",
74 "-max-variants", fmt.Sprintf("%d", *maxvariants),
75 "-min-coverage", fmt.Sprintf("%f", *mincoverage),
76 "-max-tag", fmt.Sprintf("%d", *maxtag),
85 var infile io.ReadCloser
86 if *inputFilename == "-" {
87 infile = ioutil.NopCloser(stdin)
89 infile, err = os.Open(*inputFilename)
96 cgs, err := ReadCompactGenomes(infile)
104 log.Printf("reading done, %d genomes", len(cgs))
106 log.Print("filtering")
108 for _, cg := range cgs {
109 if ntags < len(cg.Variants)/2 {
110 ntags = len(cg.Variants) / 2
112 if *maxvariants < 0 {
115 maxVariantID := tileVariantID(*maxvariants)
116 for idx, variant := range cg.Variants {
117 if variant > maxVariantID {
118 for _, cg := range cgs {
119 if len(cg.Variants) > idx {
120 cg.Variants[idx & ^1] = 0
121 cg.Variants[idx|1] = 0
128 if *maxtag >= 0 && ntags > *maxtag {
130 for i, cg := range cgs {
131 if len(cg.Variants) > *maxtag*2 {
132 cgs[i].Variants = cg.Variants[:*maxtag*2]
137 if *mincoverage < 1 {
138 mincov := int(*mincoverage * float64(len(cgs)*2))
139 cov := make([]int, ntags)
140 for _, cg := range cgs {
141 for idx, variant := range cg.Variants {
147 for tag, c := range cov {
149 for _, cg := range cgs {
150 if len(cg.Variants) > tag*2 {
151 cg.Variants[tag*2] = 0
152 cg.Variants[tag*2+1] = 0
159 log.Print("filtering done")
161 var outfile io.WriteCloser
162 if *outputFilename == "-" {
163 outfile = nopCloser{cmd.output}
165 outfile, err = os.OpenFile(*outputFilename, os.O_CREATE|os.O_WRONLY, 0777)
169 defer outfile.Close()
171 w := bufio.NewWriter(outfile)
172 enc := gob.NewEncoder(w)
174 err = enc.Encode(LibraryEntry{
180 log.Print("writing done")
185 err = outfile.Close()