Use callbacks in struct instead of args to Load*().
[lightning.git] / exportnumpy.go
index 8e2eeed230ab64285b287b180e7535e8609b0acc..39f228a1b0cecc3823128a15183e2b0bdcba62d1 100644 (file)
@@ -1,3 +1,7 @@
+// Copyright (C) The Lightning Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
 package lightning
 
 import (
@@ -42,8 +46,8 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader,
        runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
        projectUUID := flags.String("project", "", "project `UUID` for output data")
        priority := flags.Int("priority", 500, "container request priority")
-       inputFilename := flags.String("i", "-", "input `file`")
-       outputDir := flags.String("output-dir", "/tmp", "output `directory`")
+       inputDir := flags.String("input-dir", "./in", "input `directory`")
+       outputDir := flags.String("output-dir", "./out", "output `directory`")
        annotationsFilename := flags.String("output-annotations", "", "output `file` for tile variant annotations csv")
        librefsFilename := flags.String("output-onehot2tilevar", "", "when using -one-hot, create csv `file` mapping column# to tag# and variant#")
        labelsFilename := flags.String("output-labels", "", "output `file` for genome labels csv")
@@ -71,31 +75,29 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader,
                        Name:        "lightning export-numpy",
                        Client:      arvados.NewClientFromEnv(),
                        ProjectUUID: *projectUUID,
-                       RAM:         750000000000,
-                       VCPUs:       32,
+                       RAM:         500000000000,
+                       VCPUs:       96,
                        Priority:    *priority,
                        KeepCache:   1,
                        APIAccess:   true,
                }
-               err = runner.TranslatePaths(inputFilename, regionsFilename)
+               err = runner.TranslatePaths(inputDir, regionsFilename)
                if err != nil {
                        return 1
                }
                runner.Args = []string{"export-numpy", "-local=true",
-                       "-pprof", ":6000",
+                       "-pprof", ":6060",
                        fmt.Sprintf("-one-hot=%v", *onehot),
-                       "-i", *inputFilename,
+                       "-input-dir", *inputDir,
                        "-output-dir", "/mnt/output",
                        "-output-annotations", "/mnt/output/annotations.csv",
                        "-output-onehot2tilevar", "/mnt/output/onehot2tilevar.csv",
                        "-output-labels", "/mnt/output/labels.csv",
                        "-regions", *regionsFilename,
                        "-expand-regions", fmt.Sprintf("%d", *expandRegions),
-                       "-max-variants", fmt.Sprintf("%d", cmd.filter.MaxVariants),
-                       "-min-coverage", fmt.Sprintf("%f", cmd.filter.MinCoverage),
-                       "-max-tag", fmt.Sprintf("%d", cmd.filter.MaxTag),
                        "-chunks", fmt.Sprintf("%d", *chunks),
                }
+               runner.Args = append(runner.Args, cmd.filter.Args()...)
                var output string
                output, err = runner.Run()
                if err != nil {
@@ -105,27 +107,12 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader,
                return 0
        }
 
-       var input io.ReadCloser
-       if *inputFilename == "-" {
-               input = ioutil.NopCloser(stdin)
-       } else {
-               input, err = open(*inputFilename)
-               if err != nil {
-                       return 1
-               }
-               defer input.Close()
-       }
-       input = ioutil.NopCloser(bufio.NewReaderSize(input, 8*1024*1024))
        tilelib := &tileLibrary{
                retainNoCalls:       true,
                retainTileSequences: true,
                compactGenomes:      map[string][]tileVariantID{},
        }
-       err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz"), nil)
-       if err != nil {
-               return 1
-       }
-       err = input.Close()
+       err = tilelib.LoadDir(context.Background(), *inputDir)
        if err != nil {
                return 1
        }
@@ -264,19 +251,23 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader,
                                return
                        }
                        defer f.Close()
-                       npw, err := gonpy.NewWriter(f)
+                       // gonpy closes our writer and ignores errors. Give it a nopCloser so we can close f properly.
+                       npw, err := gonpy.NewWriter(nopCloser{f})
                        if err != nil {
                                lastErr.Store(err)
                                return
                        }
                        npw.Shape = []int{len(names), len(pdis) * 2}
-                       npw.WriteInt8(data)
-                       // gonpy closes f and ignores errors, doh.
-                       // err = f.Close()
-                       // if err != nil {
-                       //      lastErr.Store(err)
-                       //      return
-                       // }
+                       err = npw.WriteInt8(data)
+                       if err != nil {
+                               lastErr.Store(err)
+                               return
+                       }
+                       err = f.Close()
+                       if err != nil {
+                               lastErr.Store(err)
+                               return
+                       }
                }()
        }
        wg.Wait()
@@ -372,7 +363,7 @@ func lowqual(tilelib *tileLibrary) (lowqual []map[tileVariantID]bool) {
        for tag, variants := range tilelib.variant {
                lq := lowqual[tag]
                for varidx, hash := range variants {
-                       if len(tilelib.seq[hash]) == 0 {
+                       if len(tilelib.hashSequence(hash)) == 0 {
                                if lq == nil {
                                        lq = map[tileVariantID]bool{}
                                        lowqual[tag] = lq