X-Git-Url: https://git.arvados.org/lightning.git/blobdiff_plain/1da1956511f6716c72631ec413a4c2cd0f561dd6..5435c35e1d163adf5a79597721016c253e7c7e1d:/arvados.go diff --git a/arvados.go b/arvados.go index be11a2333d..7eed782a2a 100644 --- a/arvados.go +++ b/arvados.go @@ -1,21 +1,30 @@ -package main +// Copyright (C) The Lightning Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + +package lightning import ( + "bufio" "context" "encoding/json" "errors" "fmt" + "io" "io/ioutil" "net/url" "os" "regexp" + "strconv" "strings" "sync" "time" + "git.arvados.org/arvados.git/lib/cmd" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/keepclient" + "github.com/klauspost/pgzip" log "github.com/sirupsen/logrus" "golang.org/x/crypto/blake2b" "golang.org/x/net/websocket" @@ -177,17 +186,21 @@ reconnect: } } +var refreshTicker = time.NewTicker(5 * time.Second) + type arvadosContainerRunner struct { Client *arvados.Client Name string OutputName string ProjectUUID string + APIAccess bool VCPUs int RAM int64 Prog string // if empty, run /proc/self/exe Args []string Mounts map[string]map[string]interface{} Priority int + KeepCache int // cache buffers per VCPU (0 for default) } func (runner *arvadosContainerRunner) Run() (string, error) { @@ -227,10 +240,15 @@ func (runner *arvadosContainerRunner) RunContext(ctx context.Context) (string, e if priority < 1 { priority = 500 } + keepCache := runner.KeepCache + if keepCache < 1 { + keepCache = 2 + } rc := arvados.RuntimeConstraints{ + API: &runner.APIAccess, VCPUs: runner.VCPUs, RAM: runner.RAM, - KeepCacheRAM: (1 << 26) * 2 * int64(runner.VCPUs), + KeepCacheRAM: (1 << 26) * int64(keepCache) * int64(runner.VCPUs), } outname := &runner.OutputName if *outname == "" { @@ -250,6 +268,13 @@ func (runner *arvadosContainerRunner) RunContext(ctx context.Context) (string, e "runtime_constraints": rc, "priority": runner.Priority, "state": arvados.ContainerRequestStateCommitted, + "scheduling_parameters": arvados.SchedulingParameters{ + Preemptible: false, + Partitions: []string{}, + }, + "environment": map[string]string{ + "GOMAXPROCS": fmt.Sprintf("%d", rc.VCPUs), + }, }, }) if err != nil { @@ -264,25 +289,27 @@ func (runner *arvadosContainerRunner) RunContext(ctx context.Context) (string, e subscribedUUID := "" defer func() { if subscribedUUID != "" { + log.Printf("unsubscribe container UUID: %s", subscribedUUID) client.Unsubscribe(logch, subscribedUUID) } }() - ticker := time.NewTicker(5 * time.Second) - defer ticker.Stop() - neednewline := "" lastState := cr.State refreshCR := func() { - err = runner.Client.RequestAndDecode(&cr, "GET", "arvados/v1/container_requests/"+cr.UUID, nil, nil) + ctx, cancel := context.WithDeadline(ctx, time.Now().Add(time.Minute)) + defer cancel() + err = runner.Client.RequestAndDecodeContext(ctx, &cr, "GET", "arvados/v1/container_requests/"+cr.UUID, nil, nil) if err != nil { fmt.Fprint(os.Stderr, neednewline) + neednewline = "" log.Printf("error getting container request: %s", err) return } if lastState != cr.State { fmt.Fprint(os.Stderr, neednewline) + neednewline = "" log.Printf("container request state: %s", cr.State) lastState = cr.State } @@ -290,14 +317,16 @@ func (runner *arvadosContainerRunner) RunContext(ctx context.Context) (string, e fmt.Fprint(os.Stderr, neednewline) neednewline = "" if subscribedUUID != "" { + log.Printf("unsubscribe container UUID: %s", subscribedUUID) client.Unsubscribe(logch, subscribedUUID) } + log.Printf("subscribe container UUID: %s", cr.ContainerUUID) client.Subscribe(logch, cr.ContainerUUID) subscribedUUID = cr.ContainerUUID } } - var reCrunchstat = regexp.MustCompile(`mem .* rss`) + var reCrunchstat = regexp.MustCompile(`mem .* (\d+) rss`) waitctr: for cr.State != arvados.ContainerRequestStateFinal { select { @@ -311,7 +340,7 @@ waitctr: log.Errorf("error while trying to cancel container request %s: %s", cr.UUID, err) } break waitctr - case <-ticker.C: + case <-refreshTicker.C: refreshCR() case msg := <-logch: switch msg.EventType { @@ -327,9 +356,10 @@ waitctr: } case "crunchstat": for _, line := range strings.Split(msg.Properties.Text, "\n") { - mem := reCrunchstat.FindString(line) - if mem != "" { - fmt.Fprintf(os.Stderr, "%s \r", mem) + m := reCrunchstat.FindStringSubmatch(line) + if m != nil { + rss, _ := strconv.ParseInt(m[1], 10, 64) + fmt.Fprintf(os.Stderr, "%s rss %.3f GB \r", cr.UUID, float64(rss)/1e9) neednewline = "\n" } } @@ -368,16 +398,20 @@ func (runner *arvadosContainerRunner) TranslatePaths(paths ...*string) error { if m == nil { return fmt.Errorf("cannot find uuid in path: %q", *path) } - uuid := m[2] - mnt, ok := runner.Mounts["/mnt/"+uuid] + collID := m[2] + mnt, ok := runner.Mounts["/mnt/"+collID] if !ok { mnt = map[string]interface{}{ "kind": "collection", - "uuid": uuid, } - runner.Mounts["/mnt/"+uuid] = mnt + if len(collID) == 27 { + mnt["uuid"] = collID + } else { + mnt["portable_data_hash"] = collID + } + runner.Mounts["/mnt/"+collID] = mnt } - *path = "/mnt/" + uuid + m[3] + *path = "/mnt/" + collID + m[3] } return nil } @@ -392,7 +426,7 @@ func (runner *arvadosContainerRunner) makeCommandCollection() (string, error) { return "", err } b2 := blake2b.Sum256(exe) - cname := fmt.Sprintf("lightning-%x", b2) + cname := "lightning " + cmd.Version.String() // must build with "make", not just "go install" var existing arvados.CollectionList err = runner.Client.RequestAndDecode(&existing, "GET", "arvados/v1/collections", nil, arvados.ListOptions{ Limit: 1, @@ -400,15 +434,16 @@ func (runner *arvadosContainerRunner) makeCommandCollection() (string, error) { Filters: []arvados.Filter{ {Attr: "name", Operator: "=", Operand: cname}, {Attr: "owner_uuid", Operator: "=", Operand: runner.ProjectUUID}, + {Attr: "properties.blake2b", Operator: "=", Operand: fmt.Sprintf("%x", b2)}, }, }) if err != nil { return "", err } if len(existing.Items) > 0 { - uuid := existing.Items[0].UUID - log.Printf("using lightning binary in existing collection %s (name is %q; did not verify whether content matches)", uuid, cname) - return uuid, nil + coll := existing.Items[0] + log.Printf("using lightning binary in existing collection %s (name is %q, hash is %q; did not verify whether content matches)", coll.UUID, cname, coll.Properties["blake2b"]) + return coll.UUID, nil } log.Printf("writing lightning binary to new collection %q", cname) ac, err := arvadosclient.New(runner.Client) @@ -442,6 +477,9 @@ func (runner *arvadosContainerRunner) makeCommandCollection() (string, error) { "owner_uuid": runner.ProjectUUID, "manifest_text": mtxt, "name": cname, + "properties": map[string]interface{}{ + "blake2b": fmt.Sprintf("%x", b2), + }, }, }) if err != nil { @@ -450,3 +488,95 @@ func (runner *arvadosContainerRunner) makeCommandCollection() (string, error) { log.Printf("stored lightning binary in new collection %s", coll.UUID) return coll.UUID, nil } + +// zopen returns a reader for the given file, using the arvados API +// instead of arv-mount/fuse where applicable, and transparently +// decompressing the input if fnm ends with ".gz". +func zopen(fnm string) (io.ReadCloser, error) { + f, err := open(fnm) + if err != nil || !strings.HasSuffix(fnm, ".gz") { + return f, err + } + rdr, err := pgzip.NewReader(bufio.NewReaderSize(f, 4*1024*1024)) + if err != nil { + f.Close() + return nil, err + } + return gzipr{rdr, f}, nil +} + +// gzipr wraps a ReadCloser and a Closer, presenting a single Close() +// method that closes both wrapped objects. +type gzipr struct { + io.ReadCloser + io.Closer +} + +func (gr gzipr) Close() error { + e1 := gr.ReadCloser.Close() + e2 := gr.Closer.Close() + if e1 != nil { + return e1 + } + return e2 +} + +var ( + arvadosClientFromEnv = arvados.NewClientFromEnv() + keepClient *keepclient.KeepClient + siteFS arvados.CustomFileSystem + siteFSMtx sync.Mutex +) + +type file interface { + io.ReadCloser + io.Seeker + Readdir(n int) ([]os.FileInfo, error) +} + +func open(fnm string) (file, error) { + if os.Getenv("ARVADOS_API_HOST") == "" { + return os.Open(fnm) + } + m := collectionInPathRe.FindStringSubmatch(fnm) + if m == nil { + return os.Open(fnm) + } + collectionUUID := m[2] + collectionPath := m[3] + + siteFSMtx.Lock() + defer siteFSMtx.Unlock() + if siteFS == nil { + log.Info("setting up Arvados client") + ac, err := arvadosclient.New(arvadosClientFromEnv) + if err != nil { + return nil, err + } + ac.Client = arvados.DefaultSecureClient + keepClient = keepclient.New(ac) + // Don't use keepclient's default short timeouts. + keepClient.HTTPClient = arvados.DefaultSecureClient + keepClient.BlockCache = &keepclient.BlockCache{MaxBlocks: 4} + siteFS = arvadosClientFromEnv.SiteFileSystem(keepClient) + } else { + keepClient.BlockCache.MaxBlocks += 2 + } + + log.Infof("reading %q from %s using Arvados client", collectionPath, collectionUUID) + f, err := siteFS.Open("by_id/" + collectionUUID + collectionPath) + if err != nil { + return nil, err + } + return &reduceCacheOnClose{file: f}, nil +} + +type reduceCacheOnClose struct { + file + once sync.Once +} + +func (rc *reduceCacheOnClose) Close() error { + rc.once.Do(func() { keepClient.BlockCache.MaxBlocks -= 2 }) + return rc.file.Close() +}