+type filter struct {
+ MaxVariants int
+ MinCoverage float64
+ MaxTag int
+ MatchGenome string
+}
+
+func (f *filter) Flags(flags *flag.FlagSet) {
+ flags.IntVar(&f.MaxVariants, "max-variants", -1, "drop tiles with more than `N` variants")
+ flags.Float64Var(&f.MinCoverage, "min-coverage", 0, "drop tiles with coverage less than `P` across all haplotypes (0 < P ≤ 1)")
+ flags.IntVar(&f.MaxTag, "max-tag", -1, "drop tiles with tag ID > `N`")
+ flags.StringVar(&f.MatchGenome, "match-genome", "", "keep genomes whose names contain `regexp`, drop the rest")
+}
+
+func (f *filter) Args() []string {
+ return []string{
+ fmt.Sprintf("-max-variants=%d", f.MaxVariants),
+ fmt.Sprintf("-min-coverage=%f", f.MinCoverage),
+ fmt.Sprintf("-max-tag=%d", f.MaxTag),
+ fmt.Sprintf("-match-genome=%s", f.MatchGenome),
+ }
+}
+
+func (f *filter) Apply(tilelib *tileLibrary) {
+ // Zero out variants at tile positions that have more than
+ // f.MaxVariants tile variants.
+ if f.MaxVariants >= 0 {
+ for tag, variants := range tilelib.variant {
+ if f.MaxTag >= 0 && tag >= f.MaxTag {
+ break
+ }
+ if len(variants) <= f.MaxVariants {
+ continue
+ }
+ for _, cg := range tilelib.compactGenomes {
+ if len(cg) > tag*2 {
+ cg[tag*2] = 0
+ cg[tag*2+1] = 0
+ }
+ }
+ }
+ }
+
+ // Zero out variants at tile positions that have less than
+ // f.MinCoverage.
+ mincov := int(2*f.MinCoverage*float64(len(tilelib.compactGenomes)) + 1)
+TAG:
+ for tag := 0; tag < len(tilelib.variant) && (tag < f.MaxTag || f.MaxTag < 0); tag++ {
+ tagcov := 0
+ for _, cg := range tilelib.compactGenomes {
+ if len(cg) < tag*2+2 {
+ continue
+ }
+ if cg[tag*2] > 0 {
+ tagcov++
+ }
+ if cg[tag*2+1] > 0 {
+ tagcov++
+ }
+ if tagcov >= mincov {
+ continue TAG
+ }
+ }
+ for _, cg := range tilelib.compactGenomes {
+ if len(cg) > tag*2 {
+ cg[tag*2] = 0
+ cg[tag*2+1] = 0
+ }
+ }
+ }
+
+ // Truncate genomes and tile data to f.MaxTag (TODO: truncate
+ // refseqs too)
+ if f.MaxTag >= 0 {
+ if len(tilelib.variant) > f.MaxTag {
+ tilelib.variant = tilelib.variant[:f.MaxTag]
+ }
+ for name, cg := range tilelib.compactGenomes {
+ if len(cg) > 2*f.MaxTag {
+ tilelib.compactGenomes[name] = cg[:2*f.MaxTag]
+ }
+ }
+ }
+
+ re, err := regexp.Compile(f.MatchGenome)
+ if err != nil {
+ log.Errorf("invalid regexp %q does not match anything, dropping all genomes", f.MatchGenome)
+ }
+ for name := range tilelib.compactGenomes {
+ if !re.MatchString(name) {
+ delete(tilelib.compactGenomes, name)
+ }
+ }
+}
+
+type filtercmd struct {