Accept PDH on command line.
[lightning.git] / arvados.go
index 48cfad7ccdd6f37e3381f0bb54d1170031babc5c..4aeef48a70359ceed1b9c2bbcc5506a5aa25a069 100644 (file)
@@ -1,6 +1,11 @@
+// Copyright (C) The Lightning Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
 package lightning
 
 import (
+       "bufio"
        "context"
        "encoding/json"
        "errors"
@@ -10,7 +15,6 @@ import (
        "net/url"
        "os"
        "regexp"
-       "runtime"
        "strings"
        "sync"
        "time"
@@ -19,6 +23,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/arvadosclient"
        "git.arvados.org/arvados.git/sdk/go/keepclient"
+       "github.com/klauspost/pgzip"
        log "github.com/sirupsen/logrus"
        "golang.org/x/crypto/blake2b"
        "golang.org/x/net/websocket"
@@ -266,6 +271,9 @@ func (runner *arvadosContainerRunner) RunContext(ctx context.Context) (string, e
                                Preemptible: true,
                                Partitions:  []string{},
                        },
+                       "environment": map[string]string{
+                               "GOMAXPROCS": fmt.Sprintf("%d", rc.VCPUs),
+                       },
                },
        })
        if err != nil {
@@ -280,6 +288,7 @@ func (runner *arvadosContainerRunner) RunContext(ctx context.Context) (string, e
        subscribedUUID := ""
        defer func() {
                if subscribedUUID != "" {
+                       log.Printf("unsubscribe container UUID: %s", subscribedUUID)
                        client.Unsubscribe(logch, subscribedUUID)
                }
        }()
@@ -303,8 +312,10 @@ func (runner *arvadosContainerRunner) RunContext(ctx context.Context) (string, e
                        fmt.Fprint(os.Stderr, neednewline)
                        neednewline = ""
                        if subscribedUUID != "" {
+                               log.Printf("unsubscribe container UUID: %s", subscribedUUID)
                                client.Unsubscribe(logch, subscribedUUID)
                        }
+                       log.Printf("subscribe container UUID: %s", cr.ContainerUUID)
                        client.Subscribe(logch, cr.ContainerUUID)
                        subscribedUUID = cr.ContainerUUID
                }
@@ -381,16 +392,20 @@ func (runner *arvadosContainerRunner) TranslatePaths(paths ...*string) error {
                if m == nil {
                        return fmt.Errorf("cannot find uuid in path: %q", *path)
                }
-               uuid := m[2]
-               mnt, ok := runner.Mounts["/mnt/"+uuid]
+               collID := m[2]
+               mnt, ok := runner.Mounts["/mnt/"+collID]
                if !ok {
                        mnt = map[string]interface{}{
                                "kind": "collection",
-                               "uuid": uuid,
                        }
-                       runner.Mounts["/mnt/"+uuid] = mnt
+                       if len(collID) == 27 {
+                               mnt["uuid"] = collID
+                       } else {
+                               mnt["portable_data_hash"] = collID
+                       }
+                       runner.Mounts["/mnt/"+collID] = mnt
                }
-               *path = "/mnt/" + uuid + m[3]
+               *path = "/mnt/" + collID + m[3]
        }
        return nil
 }
@@ -468,13 +483,52 @@ func (runner *arvadosContainerRunner) makeCommandCollection() (string, error) {
        return coll.UUID, nil
 }
 
+// zopen returns a reader for the given file, using the arvados API
+// instead of arv-mount/fuse where applicable, and transparently
+// decompressing the input if fnm ends with ".gz".
+func zopen(fnm string) (io.ReadCloser, error) {
+       f, err := open(fnm)
+       if err != nil || !strings.HasSuffix(fnm, ".gz") {
+               return f, err
+       }
+       rdr, err := pgzip.NewReader(bufio.NewReaderSize(f, 4*1024*1024))
+       if err != nil {
+               f.Close()
+               return nil, err
+       }
+       return gzipr{rdr, f}, nil
+}
+
+// gzipr wraps a ReadCloser and a Closer, presenting a single Close()
+// method that closes both wrapped objects.
+type gzipr struct {
+       io.ReadCloser
+       io.Closer
+}
+
+func (gr gzipr) Close() error {
+       e1 := gr.ReadCloser.Close()
+       e2 := gr.Closer.Close()
+       if e1 != nil {
+               return e1
+       }
+       return e2
+}
+
 var (
        arvadosClientFromEnv = arvados.NewClientFromEnv()
+       keepClient           *keepclient.KeepClient
        siteFS               arvados.CustomFileSystem
        siteFSMtx            sync.Mutex
 )
 
-func open(fnm string) (io.ReadCloser, error) {
+type file interface {
+       io.ReadCloser
+       io.Seeker
+       Readdir(n int) ([]os.FileInfo, error)
+}
+
+func open(fnm string) (file, error) {
        if os.Getenv("ARVADOS_API_HOST") == "" {
                return os.Open(fnm)
        }
@@ -482,11 +536,8 @@ func open(fnm string) (io.ReadCloser, error) {
        if m == nil {
                return os.Open(fnm)
        }
-       uuid := m[2]
-       mnt := "/mnt/" + uuid + "/"
-       if !strings.HasPrefix(fnm, mnt) {
-               return os.Open(fnm)
-       }
+       collectionUUID := m[2]
+       collectionPath := m[3]
 
        siteFSMtx.Lock()
        defer siteFSMtx.Unlock()
@@ -497,15 +548,29 @@ func open(fnm string) (io.ReadCloser, error) {
                        return nil, err
                }
                ac.Client = arvados.DefaultSecureClient
-               kc := keepclient.New(ac)
+               keepClient = keepclient.New(ac)
                // Don't use keepclient's default short timeouts.
-               kc.HTTPClient = arvados.DefaultSecureClient
-               // Guess max concurrent readers, hope to avoid cache
-               // thrashing.
-               kc.BlockCache = &keepclient.BlockCache{MaxBlocks: runtime.NumCPU() * 3}
-               siteFS = arvadosClientFromEnv.SiteFileSystem(kc)
+               keepClient.HTTPClient = arvados.DefaultSecureClient
+               keepClient.BlockCache = &keepclient.BlockCache{MaxBlocks: 4}
+               siteFS = arvadosClientFromEnv.SiteFileSystem(keepClient)
+       } else {
+               keepClient.BlockCache.MaxBlocks += 2
        }
 
-       log.Infof("reading %q from %s using Arvados client", fnm[len(mnt):], uuid)
-       return siteFS.Open("by_id/" + uuid + "/" + fnm[len(mnt):])
+       log.Infof("reading %q from %s using Arvados client", collectionPath, collectionUUID)
+       f, err := siteFS.Open("by_id/" + collectionUUID + collectionPath)
+       if err != nil {
+               return nil, err
+       }
+       return &reduceCacheOnClose{file: f}, nil
+}
+
+type reduceCacheOnClose struct {
+       file
+       once sync.Once
+}
+
+func (rc *reduceCacheOnClose) Close() error {
+       rc.once.Do(func() { keepClient.BlockCache.MaxBlocks -= 2 })
+       return rc.file.Close()
 }