From 4b1fa532170fb94ea03a93102e346677e4dc819f Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Tue, 10 Aug 2021 15:26:37 -0400 Subject: [PATCH] Add filter options to export cmd. refs #17562 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- export.go | 6 ++++++ export_test.go | 1 + exportnumpy.go | 4 +--- filter.go | 11 ++++++++--- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/export.go b/export.go index 6bc1007b23..9546926c01 100644 --- a/export.go +++ b/export.go @@ -60,6 +60,7 @@ type exporter struct { outputPerChrom bool compress bool maxTileSize int + filter filter } func (cmd *exporter) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { @@ -85,6 +86,7 @@ func (cmd *exporter) RunCommand(prog string, args []string, stdin io.Reader, std flags.BoolVar(&cmd.compress, "z", false, "write gzip-compressed output files") labelsFilename := flags.String("output-labels", "", "also output genome labels csv `file`") flags.IntVar(&cmd.maxTileSize, "max-tile-size", 50000, "don't try to make annotations for tiles bigger than given `size`") + cmd.filter.Flags(flags) err = flags.Parse(args) if err == flag.ErrHelp { err = nil @@ -151,6 +153,7 @@ func (cmd *exporter) RunCommand(prog string, args []string, stdin io.Reader, std "-output-dir", "/mnt/output", "-z=" + fmt.Sprintf("%v", cmd.compress), } + runner.Args = append(runner.Args, cmd.filter.Args()...) var output string output, err = runner.Run() if err != nil { @@ -182,6 +185,9 @@ func (cmd *exporter) RunCommand(prog string, args []string, stdin io.Reader, std return 1 } + log.Infof("filtering: %+v", cmd.filter) + cmd.filter.Apply(tilelib) + names := cgnames(tilelib) for _, name := range names { cgs = append(cgs, CompactGenome{Name: name, Variants: tilelib.compactGenomes[name]}) diff --git a/export_test.go b/export_test.go index dd60b52140..2cc0fbf366 100644 --- a/export_test.go +++ b/export_test.go @@ -132,6 +132,7 @@ chr2 469 . GTGG G . . AC=1 chr2 471 . GG AA . . AC=1 `)) + c.Logf("export hgvs-numpy") outdir := c.MkDir() exited = (&exporter{}).RunCommand("export", []string{ "-local=true", diff --git a/exportnumpy.go b/exportnumpy.go index 00165b3db7..c8e9f7cfc5 100644 --- a/exportnumpy.go +++ b/exportnumpy.go @@ -95,11 +95,9 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader, "-output-labels", "/mnt/output/labels.csv", "-regions", *regionsFilename, "-expand-regions", fmt.Sprintf("%d", *expandRegions), - "-max-variants", fmt.Sprintf("%d", cmd.filter.MaxVariants), - "-min-coverage", fmt.Sprintf("%f", cmd.filter.MinCoverage), - "-max-tag", fmt.Sprintf("%d", cmd.filter.MaxTag), "-chunks", fmt.Sprintf("%d", *chunks), } + runner.Args = append(runner.Args, cmd.filter.Args()...) var output string output, err = runner.Run() if err != nil { diff --git a/filter.go b/filter.go index c66ea0ad0f..9d43abea60 100644 --- a/filter.go +++ b/filter.go @@ -65,9 +65,12 @@ func (f *filter) Apply(tilelib *tileLibrary) { // f.MinCoverage. mincov := int(2*f.MinCoverage*float64(len(tilelib.compactGenomes)) + 1) TAG: - for tag := 0; tag < len(tilelib.variant) && tag < f.MaxTag; tag++ { + for tag := 0; tag < len(tilelib.variant) && (tag < f.MaxTag || f.MaxTag < 0); tag++ { tagcov := 0 for _, cg := range tilelib.compactGenomes { + if len(cg) < tag*2+2 { + continue + } if cg[tag*2] > 0 { tagcov++ } @@ -79,8 +82,10 @@ TAG: } } for _, cg := range tilelib.compactGenomes { - cg[tag*2] = 0 - cg[tag*2+1] = 0 + if len(cg) > tag*2 { + cg[tag*2] = 0 + cg[tag*2+1] = 0 + } } } -- 2.30.2