X-Git-Url: https://git.arvados.org/lightning.git/blobdiff_plain/878124fa46adf2bd14c582e8610cff1c62ac873e..ee4f69c81672248fd775c6b2adde9f62dbfdb349:/arvados.go diff --git a/arvados.go b/arvados.go index cc41ba2c9a..48cfad7ccd 100644 --- a/arvados.go +++ b/arvados.go @@ -1,17 +1,21 @@ -package main +package lightning import ( + "context" "encoding/json" "errors" "fmt" + "io" "io/ioutil" "net/url" "os" "regexp" + "runtime" "strings" "sync" "time" + "git.arvados.org/arvados.git/lib/cmd" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/keepclient" @@ -176,19 +180,28 @@ reconnect: } } +var refreshTicker = time.NewTicker(5 * time.Second) + type arvadosContainerRunner struct { Client *arvados.Client Name string + OutputName string ProjectUUID string + APIAccess bool VCPUs int RAM int64 Prog string // if empty, run /proc/self/exe Args []string Mounts map[string]map[string]interface{} Priority int + KeepCache int // cache buffers per VCPU (0 for default) } func (runner *arvadosContainerRunner) Run() (string, error) { + return runner.RunContext(context.Background()) +} + +func (runner *arvadosContainerRunner) RunContext(ctx context.Context) (string, error) { if runner.ProjectUUID == "" { return "", errors.New("cannot run arvados container: ProjectUUID not provided") } @@ -221,10 +234,19 @@ func (runner *arvadosContainerRunner) Run() (string, error) { if priority < 1 { priority = 500 } + keepCache := runner.KeepCache + if keepCache < 1 { + keepCache = 2 + } rc := arvados.RuntimeConstraints{ + API: &runner.APIAccess, VCPUs: runner.VCPUs, RAM: runner.RAM, - KeepCacheRAM: (1 << 26) * 2 * int64(runner.VCPUs), + KeepCacheRAM: (1 << 26) * int64(keepCache) * int64(runner.VCPUs), + } + outname := &runner.OutputName + if *outname == "" { + outname = nil } var cr arvados.ContainerRequest err := runner.Client.RequestAndDecode(&cr, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{ @@ -236,9 +258,14 @@ func (runner *arvadosContainerRunner) Run() (string, error) { "mounts": mounts, "use_existing": true, "output_path": "/mnt/output", + "output_name": outname, "runtime_constraints": rc, "priority": runner.Priority, "state": arvados.ContainerRequestStateCommitted, + "scheduling_parameters": arvados.SchedulingParameters{ + Preemptible: true, + Partitions: []string{}, + }, }, }) if err != nil { @@ -257,9 +284,6 @@ func (runner *arvadosContainerRunner) Run() (string, error) { } }() - ticker := time.NewTicker(5 * time.Second) - defer ticker.Stop() - neednewline := "" lastState := cr.State @@ -287,9 +311,20 @@ func (runner *arvadosContainerRunner) Run() (string, error) { } var reCrunchstat = regexp.MustCompile(`mem .* rss`) +waitctr: for cr.State != arvados.ContainerRequestStateFinal { select { - case <-ticker.C: + case <-ctx.Done(): + err := runner.Client.RequestAndDecode(&cr, "PATCH", "arvados/v1/container_requests/"+cr.UUID, nil, map[string]interface{}{ + "container_request": map[string]interface{}{ + "priority": 0, + }, + }) + if err != nil { + log.Errorf("error while trying to cancel container request %s: %s", cr.UUID, err) + } + break waitctr + case <-refreshTicker.C: refreshCR() case msg := <-logch: switch msg.EventType { @@ -316,6 +351,10 @@ func (runner *arvadosContainerRunner) Run() (string, error) { } fmt.Fprint(os.Stderr, neednewline) + if err := ctx.Err(); err != nil { + return "", err + } + var c arvados.Container err = runner.Client.RequestAndDecode(&c, "GET", "arvados/v1/containers/"+cr.ContainerUUID, nil, nil) if err != nil { @@ -356,13 +395,17 @@ func (runner *arvadosContainerRunner) TranslatePaths(paths ...*string) error { return nil } +var mtxMakeCommandCollection sync.Mutex + func (runner *arvadosContainerRunner) makeCommandCollection() (string, error) { + mtxMakeCommandCollection.Lock() + defer mtxMakeCommandCollection.Unlock() exe, err := ioutil.ReadFile("/proc/self/exe") if err != nil { return "", err } b2 := blake2b.Sum256(exe) - cname := fmt.Sprintf("lightning-%x", b2) + cname := "lightning " + cmd.Version.String() // must build with "make", not just "go install" var existing arvados.CollectionList err = runner.Client.RequestAndDecode(&existing, "GET", "arvados/v1/collections", nil, arvados.ListOptions{ Limit: 1, @@ -370,15 +413,16 @@ func (runner *arvadosContainerRunner) makeCommandCollection() (string, error) { Filters: []arvados.Filter{ {Attr: "name", Operator: "=", Operand: cname}, {Attr: "owner_uuid", Operator: "=", Operand: runner.ProjectUUID}, + {Attr: "properties.blake2b", Operator: "=", Operand: fmt.Sprintf("%x", b2)}, }, }) if err != nil { return "", err } if len(existing.Items) > 0 { - uuid := existing.Items[0].UUID - log.Printf("using lightning binary in existing collection %s (name is %q; did not verify whether content matches)", uuid, cname) - return uuid, nil + coll := existing.Items[0] + log.Printf("using lightning binary in existing collection %s (name is %q, hash is %q; did not verify whether content matches)", coll.UUID, cname, coll.Properties["blake2b"]) + return coll.UUID, nil } log.Printf("writing lightning binary to new collection %q", cname) ac, err := arvadosclient.New(runner.Client) @@ -412,6 +456,9 @@ func (runner *arvadosContainerRunner) makeCommandCollection() (string, error) { "owner_uuid": runner.ProjectUUID, "manifest_text": mtxt, "name": cname, + "properties": map[string]interface{}{ + "blake2b": fmt.Sprintf("%x", b2), + }, }, }) if err != nil { @@ -420,3 +467,45 @@ func (runner *arvadosContainerRunner) makeCommandCollection() (string, error) { log.Printf("stored lightning binary in new collection %s", coll.UUID) return coll.UUID, nil } + +var ( + arvadosClientFromEnv = arvados.NewClientFromEnv() + siteFS arvados.CustomFileSystem + siteFSMtx sync.Mutex +) + +func open(fnm string) (io.ReadCloser, error) { + if os.Getenv("ARVADOS_API_HOST") == "" { + return os.Open(fnm) + } + m := collectionInPathRe.FindStringSubmatch(fnm) + if m == nil { + return os.Open(fnm) + } + uuid := m[2] + mnt := "/mnt/" + uuid + "/" + if !strings.HasPrefix(fnm, mnt) { + return os.Open(fnm) + } + + siteFSMtx.Lock() + defer siteFSMtx.Unlock() + if siteFS == nil { + log.Info("setting up Arvados client") + ac, err := arvadosclient.New(arvadosClientFromEnv) + if err != nil { + return nil, err + } + ac.Client = arvados.DefaultSecureClient + kc := keepclient.New(ac) + // Don't use keepclient's default short timeouts. + kc.HTTPClient = arvados.DefaultSecureClient + // Guess max concurrent readers, hope to avoid cache + // thrashing. + kc.BlockCache = &keepclient.BlockCache{MaxBlocks: runtime.NumCPU() * 3} + siteFS = arvadosClientFromEnv.SiteFileSystem(kc) + } + + log.Infof("reading %q from %s using Arvados client", fnm[len(mnt):], uuid) + return siteFS.Open("by_id/" + uuid + "/" + fnm[len(mnt):]) +}