Add filter options to export cmd.
authorTom Clegg <tom@tomclegg.ca>
Tue, 10 Aug 2021 19:26:37 +0000 (15:26 -0400)
committerTom Clegg <tom@tomclegg.ca>
Tue, 10 Aug 2021 19:26:37 +0000 (15:26 -0400)
refs #17562

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

export.go
export_test.go
exportnumpy.go
filter.go

index 6bc1007b235cadffdcdda86ef7d5d0c65bc90468..9546926c015fe98d02357b17fb4f27e54e2a1951 100644 (file)
--- a/export.go
+++ b/export.go
@@ -60,6 +60,7 @@ type exporter struct {
        outputPerChrom bool
        compress       bool
        maxTileSize    int
+       filter         filter
 }
 
 func (cmd *exporter) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
@@ -85,6 +86,7 @@ func (cmd *exporter) RunCommand(prog string, args []string, stdin io.Reader, std
        flags.BoolVar(&cmd.compress, "z", false, "write gzip-compressed output files")
        labelsFilename := flags.String("output-labels", "", "also output genome labels csv `file`")
        flags.IntVar(&cmd.maxTileSize, "max-tile-size", 50000, "don't try to make annotations for tiles bigger than given `size`")
+       cmd.filter.Flags(flags)
        err = flags.Parse(args)
        if err == flag.ErrHelp {
                err = nil
@@ -151,6 +153,7 @@ func (cmd *exporter) RunCommand(prog string, args []string, stdin io.Reader, std
                        "-output-dir", "/mnt/output",
                        "-z=" + fmt.Sprintf("%v", cmd.compress),
                }
+               runner.Args = append(runner.Args, cmd.filter.Args()...)
                var output string
                output, err = runner.Run()
                if err != nil {
@@ -182,6 +185,9 @@ func (cmd *exporter) RunCommand(prog string, args []string, stdin io.Reader, std
                return 1
        }
 
+       log.Infof("filtering: %+v", cmd.filter)
+       cmd.filter.Apply(tilelib)
+
        names := cgnames(tilelib)
        for _, name := range names {
                cgs = append(cgs, CompactGenome{Name: name, Variants: tilelib.compactGenomes[name]})
index dd60b52140a8509a049eae2f7d10c033f4cae31d..2cc0fbf36616f86a53bb653f602c7bd219b34ae5 100644 (file)
@@ -132,6 +132,7 @@ chr2        469     .       GTGG    G       .       .       AC=1
 chr2   471     .       GG      AA      .       .       AC=1
 `))
 
+       c.Logf("export hgvs-numpy")
        outdir := c.MkDir()
        exited = (&exporter{}).RunCommand("export", []string{
                "-local=true",
index 00165b3db73fabab03f5dcc883e8af20c7973b57..c8e9f7cfc59a1743eeec83a56e0228303e6a49ac 100644 (file)
@@ -95,11 +95,9 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader,
                        "-output-labels", "/mnt/output/labels.csv",
                        "-regions", *regionsFilename,
                        "-expand-regions", fmt.Sprintf("%d", *expandRegions),
-                       "-max-variants", fmt.Sprintf("%d", cmd.filter.MaxVariants),
-                       "-min-coverage", fmt.Sprintf("%f", cmd.filter.MinCoverage),
-                       "-max-tag", fmt.Sprintf("%d", cmd.filter.MaxTag),
                        "-chunks", fmt.Sprintf("%d", *chunks),
                }
+               runner.Args = append(runner.Args, cmd.filter.Args()...)
                var output string
                output, err = runner.Run()
                if err != nil {
index c66ea0ad0f77b3542128a88fa98bdf6e22d94df8..9d43abea608d98d9be8aec53934f2383c86fb0b1 100644 (file)
--- a/filter.go
+++ b/filter.go
@@ -65,9 +65,12 @@ func (f *filter) Apply(tilelib *tileLibrary) {
        // f.MinCoverage.
        mincov := int(2*f.MinCoverage*float64(len(tilelib.compactGenomes)) + 1)
 TAG:
-       for tag := 0; tag < len(tilelib.variant) && tag < f.MaxTag; tag++ {
+       for tag := 0; tag < len(tilelib.variant) && (tag < f.MaxTag || f.MaxTag < 0); tag++ {
                tagcov := 0
                for _, cg := range tilelib.compactGenomes {
+                       if len(cg) < tag*2+2 {
+                               continue
+                       }
                        if cg[tag*2] > 0 {
                                tagcov++
                        }
@@ -79,8 +82,10 @@ TAG:
                        }
                }
                for _, cg := range tilelib.compactGenomes {
-                       cg[tag*2] = 0
-                       cg[tag*2+1] = 0
+                       if len(cg) > tag*2 {
+                               cg[tag*2] = 0
+                               cg[tag*2+1] = 0
+                       }
                }
        }