X-Git-Url: https://git.arvados.org/lightning.git/blobdiff_plain/896220cbd21811433f6db068f559677927e56757..4f220df0aeb61480e2d203857830ad5c2bbed4c0:/exportnumpy.go diff --git a/exportnumpy.go b/exportnumpy.go index 8e2eeed230..39f228a1b0 100644 --- a/exportnumpy.go +++ b/exportnumpy.go @@ -1,3 +1,7 @@ +// Copyright (C) The Lightning Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + package lightning import ( @@ -42,8 +46,8 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader, runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)") projectUUID := flags.String("project", "", "project `UUID` for output data") priority := flags.Int("priority", 500, "container request priority") - inputFilename := flags.String("i", "-", "input `file`") - outputDir := flags.String("output-dir", "/tmp", "output `directory`") + inputDir := flags.String("input-dir", "./in", "input `directory`") + outputDir := flags.String("output-dir", "./out", "output `directory`") annotationsFilename := flags.String("output-annotations", "", "output `file` for tile variant annotations csv") librefsFilename := flags.String("output-onehot2tilevar", "", "when using -one-hot, create csv `file` mapping column# to tag# and variant#") labelsFilename := flags.String("output-labels", "", "output `file` for genome labels csv") @@ -71,31 +75,29 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader, Name: "lightning export-numpy", Client: arvados.NewClientFromEnv(), ProjectUUID: *projectUUID, - RAM: 750000000000, - VCPUs: 32, + RAM: 500000000000, + VCPUs: 96, Priority: *priority, KeepCache: 1, APIAccess: true, } - err = runner.TranslatePaths(inputFilename, regionsFilename) + err = runner.TranslatePaths(inputDir, regionsFilename) if err != nil { return 1 } runner.Args = []string{"export-numpy", "-local=true", - "-pprof", ":6000", + "-pprof", ":6060", fmt.Sprintf("-one-hot=%v", *onehot), - "-i", *inputFilename, + "-input-dir", *inputDir, "-output-dir", "/mnt/output", "-output-annotations", "/mnt/output/annotations.csv", "-output-onehot2tilevar", "/mnt/output/onehot2tilevar.csv", "-output-labels", "/mnt/output/labels.csv", "-regions", *regionsFilename, "-expand-regions", fmt.Sprintf("%d", *expandRegions), - "-max-variants", fmt.Sprintf("%d", cmd.filter.MaxVariants), - "-min-coverage", fmt.Sprintf("%f", cmd.filter.MinCoverage), - "-max-tag", fmt.Sprintf("%d", cmd.filter.MaxTag), "-chunks", fmt.Sprintf("%d", *chunks), } + runner.Args = append(runner.Args, cmd.filter.Args()...) var output string output, err = runner.Run() if err != nil { @@ -105,27 +107,12 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader, return 0 } - var input io.ReadCloser - if *inputFilename == "-" { - input = ioutil.NopCloser(stdin) - } else { - input, err = open(*inputFilename) - if err != nil { - return 1 - } - defer input.Close() - } - input = ioutil.NopCloser(bufio.NewReaderSize(input, 8*1024*1024)) tilelib := &tileLibrary{ retainNoCalls: true, retainTileSequences: true, compactGenomes: map[string][]tileVariantID{}, } - err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz"), nil) - if err != nil { - return 1 - } - err = input.Close() + err = tilelib.LoadDir(context.Background(), *inputDir) if err != nil { return 1 } @@ -264,19 +251,23 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader, return } defer f.Close() - npw, err := gonpy.NewWriter(f) + // gonpy closes our writer and ignores errors. Give it a nopCloser so we can close f properly. + npw, err := gonpy.NewWriter(nopCloser{f}) if err != nil { lastErr.Store(err) return } npw.Shape = []int{len(names), len(pdis) * 2} - npw.WriteInt8(data) - // gonpy closes f and ignores errors, doh. - // err = f.Close() - // if err != nil { - // lastErr.Store(err) - // return - // } + err = npw.WriteInt8(data) + if err != nil { + lastErr.Store(err) + return + } + err = f.Close() + if err != nil { + lastErr.Store(err) + return + } }() } wg.Wait() @@ -372,7 +363,7 @@ func lowqual(tilelib *tileLibrary) (lowqual []map[tileVariantID]bool) { for tag, variants := range tilelib.variant { lq := lowqual[tag] for varidx, hash := range variants { - if len(tilelib.seq[hash]) == 0 { + if len(tilelib.hashSequence(hash)) == 0 { if lq == nil { lq = map[tileVariantID]bool{} lowqual[tag] = lq