From: Tom Clegg Date: Wed, 11 Aug 2021 22:10:26 +0000 (-0400) Subject: Add -match-genome=regexp filter. X-Git-Url: https://git.arvados.org/lightning.git/commitdiff_plain/93e063d8357b8682c0a730fc702f8b05ece6c46f Add -match-genome=regexp filter. refs #17939 refs #17922 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/export_test.go b/export_test.go index d87b3ae51c..a2452c6ca3 100644 --- a/export_test.go +++ b/export_test.go @@ -164,6 +164,7 @@ chr2 471 . GG AA . . AC=1 "-output-dir=" + outdir, "-output-format=hgvs-numpy", "-ref=testdata/ref.fasta", + "-match-genome=input[12]", }, nil, os.Stderr, os.Stderr) c.Check(exited, check.Equals, 0) diff --git a/filter.go b/filter.go index 9d43abea60..9fe150e63d 100644 --- a/filter.go +++ b/filter.go @@ -15,6 +15,7 @@ import ( "net/http" _ "net/http/pprof" "os" + "regexp" "strings" "git.arvados.org/arvados.git/sdk/go/arvados" @@ -25,12 +26,14 @@ type filter struct { MaxVariants int MinCoverage float64 MaxTag int + MatchGenome string } func (f *filter) Flags(flags *flag.FlagSet) { flags.IntVar(&f.MaxVariants, "max-variants", -1, "drop tiles with more than `N` variants") flags.Float64Var(&f.MinCoverage, "min-coverage", 0, "drop tiles with coverage less than `P` across all haplotypes (0 < P ≤ 1)") flags.IntVar(&f.MaxTag, "max-tag", -1, "drop tiles with tag ID > `N`") + flags.StringVar(&f.MatchGenome, "match-genome", "", "keep genomes whose names contain `regexp`, drop the rest") } func (f *filter) Args() []string { @@ -38,6 +41,7 @@ func (f *filter) Args() []string { fmt.Sprintf("-max-variants=%d", f.MaxVariants), fmt.Sprintf("-min-coverage=%f", f.MinCoverage), fmt.Sprintf("-max-tag=%d", f.MaxTag), + fmt.Sprintf("-match-genome=%s", f.MatchGenome), } } @@ -101,6 +105,16 @@ TAG: } } } + + re, err := regexp.Compile(f.MatchGenome) + if err != nil { + log.Errorf("invalid regexp %q does not match anything, dropping all genomes", f.MatchGenome) + } + for name := range tilelib.compactGenomes { + if !re.MatchString(name) { + delete(tilelib.compactGenomes, name) + } + } } type filtercmd struct {