-package main
+package lightning
import (
+ "context"
"encoding/json"
"errors"
"fmt"
+ "io"
"io/ioutil"
"net/url"
"os"
"regexp"
+ "runtime"
"strings"
"sync"
"time"
+ "git.arvados.org/arvados.git/lib/cmd"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
"git.arvados.org/arvados.git/sdk/go/keepclient"
"method": "subscribe",
"filters": [][]interface{}{
{"object_uuid", "=", uuid},
- {"event_type", "in", []string{"stderr", "crunch-run", "update"}},
+ {"event_type", "in", []string{"stderr", "crunch-run", "crunchstat", "update"}},
},
})
}
}
}
+var refreshTicker = time.NewTicker(5 * time.Second)
+
type arvadosContainerRunner struct {
Client *arvados.Client
Name string
+ OutputName string
ProjectUUID string
+ APIAccess bool
VCPUs int
RAM int64
Prog string // if empty, run /proc/self/exe
Args []string
Mounts map[string]map[string]interface{}
Priority int
+ KeepCache int // cache buffers per VCPU (0 for default)
}
func (runner *arvadosContainerRunner) Run() (string, error) {
+ return runner.RunContext(context.Background())
+}
+
+func (runner *arvadosContainerRunner) RunContext(ctx context.Context) (string, error) {
if runner.ProjectUUID == "" {
return "", errors.New("cannot run arvados container: ProjectUUID not provided")
}
if priority < 1 {
priority = 500
}
+ keepCache := runner.KeepCache
+ if keepCache < 1 {
+ keepCache = 2
+ }
rc := arvados.RuntimeConstraints{
+ API: &runner.APIAccess,
VCPUs: runner.VCPUs,
RAM: runner.RAM,
- KeepCacheRAM: (1 << 26) * 2 * int64(runner.VCPUs),
+ KeepCacheRAM: (1 << 26) * int64(keepCache) * int64(runner.VCPUs),
+ }
+ outname := &runner.OutputName
+ if *outname == "" {
+ outname = nil
}
var cr arvados.ContainerRequest
err := runner.Client.RequestAndDecode(&cr, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
"mounts": mounts,
"use_existing": true,
"output_path": "/mnt/output",
+ "output_name": outname,
"runtime_constraints": rc,
"priority": runner.Priority,
"state": arvados.ContainerRequestStateCommitted,
+ "scheduling_parameters": arvados.SchedulingParameters{
+ Preemptible: true,
+ Partitions: []string{},
+ },
},
})
if err != nil {
}
}()
- ticker := time.NewTicker(5 * time.Second)
- defer ticker.Stop()
+ neednewline := ""
lastState := cr.State
refreshCR := func() {
err = runner.Client.RequestAndDecode(&cr, "GET", "arvados/v1/container_requests/"+cr.UUID, nil, nil)
if err != nil {
+ fmt.Fprint(os.Stderr, neednewline)
log.Printf("error getting container request: %s", err)
return
}
if lastState != cr.State {
+ fmt.Fprint(os.Stderr, neednewline)
log.Printf("container request state: %s", cr.State)
lastState = cr.State
}
if subscribedUUID != cr.ContainerUUID {
+ fmt.Fprint(os.Stderr, neednewline)
+ neednewline = ""
if subscribedUUID != "" {
client.Unsubscribe(logch, subscribedUUID)
}
}
}
+ var reCrunchstat = regexp.MustCompile(`mem .* rss`)
+waitctr:
for cr.State != arvados.ContainerRequestStateFinal {
select {
- case <-ticker.C:
+ case <-ctx.Done():
+ err := runner.Client.RequestAndDecode(&cr, "PATCH", "arvados/v1/container_requests/"+cr.UUID, nil, map[string]interface{}{
+ "container_request": map[string]interface{}{
+ "priority": 0,
+ },
+ })
+ if err != nil {
+ log.Errorf("error while trying to cancel container request %s: %s", cr.UUID, err)
+ }
+ break waitctr
+ case <-refreshTicker.C:
refreshCR()
case msg := <-logch:
switch msg.EventType {
case "update":
refreshCR()
- default:
+ case "stderr":
for _, line := range strings.Split(msg.Properties.Text, "\n") {
if line != "" {
+ fmt.Fprint(os.Stderr, neednewline)
+ neednewline = ""
log.Print(line)
}
}
+ case "crunchstat":
+ for _, line := range strings.Split(msg.Properties.Text, "\n") {
+ mem := reCrunchstat.FindString(line)
+ if mem != "" {
+ fmt.Fprintf(os.Stderr, "%s \r", mem)
+ neednewline = "\n"
+ }
+ }
}
}
}
+ fmt.Fprint(os.Stderr, neednewline)
+
+ if err := ctx.Err(); err != nil {
+ return "", err
+ }
var c arvados.Container
err = runner.Client.RequestAndDecode(&c, "GET", "arvados/v1/containers/"+cr.ContainerUUID, nil, nil)
return nil
}
+var mtxMakeCommandCollection sync.Mutex
+
func (runner *arvadosContainerRunner) makeCommandCollection() (string, error) {
+ mtxMakeCommandCollection.Lock()
+ defer mtxMakeCommandCollection.Unlock()
exe, err := ioutil.ReadFile("/proc/self/exe")
if err != nil {
return "", err
}
b2 := blake2b.Sum256(exe)
- cname := fmt.Sprintf("lightning-%x", b2)
+ cname := "lightning " + cmd.Version.String() // must build with "make", not just "go install"
var existing arvados.CollectionList
err = runner.Client.RequestAndDecode(&existing, "GET", "arvados/v1/collections", nil, arvados.ListOptions{
Limit: 1,
Filters: []arvados.Filter{
{Attr: "name", Operator: "=", Operand: cname},
{Attr: "owner_uuid", Operator: "=", Operand: runner.ProjectUUID},
+ {Attr: "properties.blake2b", Operator: "=", Operand: fmt.Sprintf("%x", b2)},
},
})
if err != nil {
return "", err
}
if len(existing.Items) > 0 {
- uuid := existing.Items[0].UUID
- log.Printf("using lightning binary in existing collection %s (name is %q; did not verify whether content matches)", uuid, cname)
- return uuid, nil
+ coll := existing.Items[0]
+ log.Printf("using lightning binary in existing collection %s (name is %q, hash is %q; did not verify whether content matches)", coll.UUID, cname, coll.Properties["blake2b"])
+ return coll.UUID, nil
}
log.Printf("writing lightning binary to new collection %q", cname)
ac, err := arvadosclient.New(runner.Client)
"owner_uuid": runner.ProjectUUID,
"manifest_text": mtxt,
"name": cname,
+ "properties": map[string]interface{}{
+ "blake2b": fmt.Sprintf("%x", b2),
+ },
},
})
if err != nil {
log.Printf("stored lightning binary in new collection %s", coll.UUID)
return coll.UUID, nil
}
+
+var (
+ arvadosClientFromEnv = arvados.NewClientFromEnv()
+ siteFS arvados.CustomFileSystem
+ siteFSMtx sync.Mutex
+)
+
+func open(fnm string) (io.ReadCloser, error) {
+ if os.Getenv("ARVADOS_API_HOST") == "" {
+ return os.Open(fnm)
+ }
+ m := collectionInPathRe.FindStringSubmatch(fnm)
+ if m == nil {
+ return os.Open(fnm)
+ }
+ uuid := m[2]
+ mnt := "/mnt/" + uuid + "/"
+ if !strings.HasPrefix(fnm, mnt) {
+ return os.Open(fnm)
+ }
+
+ siteFSMtx.Lock()
+ defer siteFSMtx.Unlock()
+ if siteFS == nil {
+ log.Info("setting up Arvados client")
+ ac, err := arvadosclient.New(arvadosClientFromEnv)
+ if err != nil {
+ return nil, err
+ }
+ ac.Client = arvados.DefaultSecureClient
+ kc := keepclient.New(ac)
+ // Don't use keepclient's default short timeouts.
+ kc.HTTPClient = arvados.DefaultSecureClient
+ // Guess max concurrent readers, hope to avoid cache
+ // thrashing.
+ kc.BlockCache = &keepclient.BlockCache{MaxBlocks: runtime.NumCPU() * 3}
+ siteFS = arvadosClientFromEnv.SiteFileSystem(kc)
+ }
+
+ log.Infof("reading %q from %s using Arvados client", fnm[len(mnt):], uuid)
+ return siteFS.Open("by_id/" + uuid + "/" + fnm[len(mnt):])
+}