More memory + direct Keep access for merge and exportnumpy.
authorTom Clegg <tom@tomclegg.ca>
Fri, 4 Dec 2020 15:46:42 +0000 (10:46 -0500)
committerTom Clegg <tom@tomclegg.ca>
Fri, 4 Dec 2020 15:46:42 +0000 (10:46 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

exportnumpy.go
merge.go

index eba0234bfe216e12384306f73b97fec04e34a673..c21bf80fbc5f52abf3fe3a099c6526b919866d48 100644 (file)
@@ -66,9 +66,11 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader,
                        Name:        "lightning export-numpy",
                        Client:      arvados.NewClientFromEnv(),
                        ProjectUUID: *projectUUID,
-                       RAM:         128000000000,
-                       VCPUs:       32,
+                       RAM:         240000000000,
+                       VCPUs:       16,
                        Priority:    *priority,
+                       KeepCache:   1,
+                       APIAccess:   true,
                }
                err = runner.TranslatePaths(inputFilename)
                if err != nil {
@@ -98,12 +100,13 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader,
        if *inputFilename == "-" {
                input = ioutil.NopCloser(stdin)
        } else {
-               input, err = os.Open(*inputFilename)
+               input, err = open(*inputFilename)
                if err != nil {
                        return 1
                }
                defer input.Close()
        }
+       input = ioutil.NopCloser(bufio.NewReaderSize(input, 8*1024*1024))
        tilelib := &tileLibrary{
                retainNoCalls:       true,
                retainTileSequences: true,
index 904db8e4d27f5f0b6cc2adff2c2613e850a89ee9..968923711f3353fd0a37d36626a886446244ca93 100644 (file)
--- a/merge.go
+++ b/merge.go
@@ -70,9 +70,10 @@ func (cmd *merger) RunCommand(prog string, args []string, stdin io.Reader, stdou
                        Name:        "lightning merge",
                        Client:      arvados.NewClientFromEnv(),
                        ProjectUUID: *projectUUID,
-                       RAM:         64000000000,
+                       RAM:         150000000000,
                        VCPUs:       2,
                        Priority:    *priority,
+                       APIAccess:   true,
                }
                for i := range cmd.inputs {
                        err = runner.TranslatePaths(&cmd.inputs[i])
@@ -152,22 +153,21 @@ func (cmd *merger) doMerge() error {
 
        var wg sync.WaitGroup
        for _, input := range cmd.inputs {
-               var infile io.ReadCloser
-               if input == "-" {
-                       infile = ioutil.NopCloser(cmd.stdin)
-               } else {
+               rdr := ioutil.NopCloser(cmd.stdin)
+               if input != "-" {
                        var err error
-                       infile, err = os.Open(input)
+                       rdr, err = open(input)
                        if err != nil {
                                return err
                        }
-                       defer infile.Close()
+                       defer rdr.Close()
                }
+               rdr = ioutil.NopCloser(bufio.NewReaderSize(rdr, 8*1024*1024))
                wg.Add(1)
                go func(input string) {
                        defer wg.Done()
                        log.Printf("%s: reading", input)
-                       err := cmd.tilelib.LoadGob(ctx, infile, strings.HasSuffix(input, ".gz"), nil)
+                       err := cmd.tilelib.LoadGob(ctx, rdr, strings.HasSuffix(input, ".gz"), nil)
                        if err != nil {
                                cmd.setError(fmt.Errorf("%s: load failed: %w", input, err))
                                cancel()