15 "git.arvados.org/arvados.git/sdk/go/arvados"
16 log "github.com/sirupsen/logrus"
25 func (f *filter) Flags(flags *flag.FlagSet) {
26 flags.IntVar(&f.MaxVariants, "max-variants", -1, "drop tiles with more than `N` variants")
27 flags.Float64Var(&f.MinCoverage, "min-coverage", 0, "drop tiles with coverage less than `P` across all haplotypes (0 < P ≤ 1)")
28 flags.IntVar(&f.MaxTag, "max-tag", -1, "drop tiles with tag ID > `N`")
31 func (f *filter) Apply(tilelib *tileLibrary) {
32 // Zero out variants at tile positions that have more than
33 // f.MaxVariants tile variants.
34 if f.MaxVariants >= 0 {
35 for tag, variants := range tilelib.variant {
36 if f.MaxTag >= 0 && tag >= f.MaxTag {
39 if len(variants) <= f.MaxVariants {
42 tilelib.variant[tag] = nil
43 for _, cg := range tilelib.compactGenomes {
52 // Zero out variants at tile positions that have less than
54 mincov := int(2*f.MinCoverage*float64(len(tilelib.compactGenomes)) + 1)
56 for tag := 0; tag < len(tilelib.variant) && tag < f.MaxTag; tag++ {
58 for _, cg := range tilelib.compactGenomes {
69 for _, cg := range tilelib.compactGenomes {
75 // Truncate genomes and tile data to f.MaxTag (TODO: truncate
78 if len(tilelib.variant) > f.MaxTag {
79 tilelib.variant = tilelib.variant[:f.MaxTag]
81 for name, cg := range tilelib.compactGenomes {
82 if len(cg) > 2*f.MaxTag {
83 tilelib.compactGenomes[name] = cg[:2*f.MaxTag]
89 type filtercmd struct {
94 func (cmd *filtercmd) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
98 fmt.Fprintf(stderr, "%s\n", err)
101 flags := flag.NewFlagSet("", flag.ContinueOnError)
102 flags.SetOutput(stderr)
103 pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
104 runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
105 projectUUID := flags.String("project", "", "project `UUID` for output data")
106 priority := flags.Int("priority", 500, "container request priority")
107 inputFilename := flags.String("i", "-", "input `file`")
108 outputFilename := flags.String("o", "-", "output `file`")
109 cmd.filter.Flags(flags)
110 err = flags.Parse(args)
111 if err == flag.ErrHelp {
114 } else if err != nil {
121 log.Println(http.ListenAndServe(*pprof, nil))
126 if *outputFilename != "-" {
127 err = errors.New("cannot specify output file in container mode: not implemented")
130 runner := arvadosContainerRunner{
131 Name: "lightning filter",
132 Client: arvados.NewClientFromEnv(),
133 ProjectUUID: *projectUUID,
138 err = runner.TranslatePaths(inputFilename)
142 runner.Args = []string{"filter", "-local=true",
143 "-i", *inputFilename,
144 "-o", "/mnt/output/library.gob",
145 "-max-variants", fmt.Sprintf("%d", cmd.MaxVariants),
146 "-min-coverage", fmt.Sprintf("%f", cmd.MinCoverage),
147 "-max-tag", fmt.Sprintf("%d", cmd.MaxTag),
150 output, err = runner.Run()
154 fmt.Fprintln(stdout, output+"/library.gob")
158 var infile io.ReadCloser
159 if *inputFilename == "-" {
160 infile = ioutil.NopCloser(stdin)
162 infile, err = os.Open(*inputFilename)
169 cgs, err := ReadCompactGenomes(infile)
177 log.Printf("reading done, %d genomes", len(cgs))
179 log.Print("filtering")
181 for _, cg := range cgs {
182 if ntags < len(cg.Variants)/2 {
183 ntags = len(cg.Variants) / 2
185 if cmd.MaxVariants < 0 {
188 for idx, variant := range cg.Variants {
189 if variant > tileVariantID(cmd.MaxVariants) {
190 for _, cg := range cgs {
191 if len(cg.Variants) > idx {
192 cg.Variants[idx & ^1] = 0
193 cg.Variants[idx|1] = 0
200 if cmd.MaxTag >= 0 && ntags > cmd.MaxTag {
202 for i, cg := range cgs {
203 if len(cg.Variants) > cmd.MaxTag*2 {
204 cgs[i].Variants = cg.Variants[:cmd.MaxTag*2]
209 if cmd.MinCoverage > 0 {
210 mincov := int(cmd.MinCoverage * float64(len(cgs)*2))
211 cov := make([]int, ntags)
212 for _, cg := range cgs {
213 for idx, variant := range cg.Variants {
219 for tag, c := range cov {
221 for _, cg := range cgs {
222 if len(cg.Variants) > tag*2 {
223 cg.Variants[tag*2] = 0
224 cg.Variants[tag*2+1] = 0
231 log.Print("filtering done")
233 var outfile io.WriteCloser
234 if *outputFilename == "-" {
235 outfile = nopCloser{cmd.output}
237 outfile, err = os.OpenFile(*outputFilename, os.O_CREATE|os.O_WRONLY, 0777)
241 defer outfile.Close()
243 w := bufio.NewWriter(outfile)
244 enc := gob.NewEncoder(w)
246 err = enc.Encode(LibraryEntry{
252 log.Print("writing done")
257 err = outfile.Close()