X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/cb4efac6793d18892dde09c631895cb98c3df470..7285b2204d2650179782981dea2af454e607fd7f:/lib/dispatchcloud/worker/runner.go diff --git a/lib/dispatchcloud/worker/runner.go b/lib/dispatchcloud/worker/runner.go index c30ff9f2b7..29c4b8e0a3 100644 --- a/lib/dispatchcloud/worker/runner.go +++ b/lib/dispatchcloud/worker/runner.go @@ -8,10 +8,12 @@ import ( "bytes" "encoding/json" "fmt" + "net" + "strings" "syscall" "time" - "git.curoverse.com/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/lib/crunchrun" "github.com/sirupsen/logrus" ) @@ -20,7 +22,9 @@ import ( type remoteRunner struct { uuid string executor Executor - arvClient *arvados.Client + configJSON json.RawMessage + runnerCmd string + runnerArgs []string remoteUser string timeoutTERM time.Duration timeoutSignal time.Duration @@ -36,10 +40,39 @@ type remoteRunner struct { // newRemoteRunner returns a new remoteRunner. Caller should ensure // Close() is called to release resources. func newRemoteRunner(uuid string, wkr *worker) *remoteRunner { + // Send the instance type record as a JSON doc so crunch-run + // can log it. + var instJSON bytes.Buffer + enc := json.NewEncoder(&instJSON) + enc.SetIndent("", " ") + if err := enc.Encode(wkr.instType); err != nil { + panic(err) + } + var configData crunchrun.ConfigData + configData.Env = map[string]string{ + "ARVADOS_API_HOST": wkr.wp.arvClient.APIHost, + "ARVADOS_API_TOKEN": wkr.wp.arvClient.AuthToken, + "InstanceType": instJSON.String(), + "GatewayAddress": net.JoinHostPort(wkr.instance.Address(), "0"), + "GatewayAuthSecret": wkr.wp.gatewayAuthSecret(uuid), + } + if wkr.wp.arvClient.Insecure { + configData.Env["ARVADOS_API_HOST_INSECURE"] = "1" + } + if bufs := wkr.wp.cluster.Containers.LocalKeepBlobBuffersPerVCPU; bufs > 0 { + configData.Cluster = wkr.wp.cluster + configData.KeepBuffers = bufs * wkr.instType.VCPUs + } + configJSON, err := json.Marshal(configData) + if err != nil { + panic(err) + } rr := &remoteRunner{ uuid: uuid, executor: wkr.executor, - arvClient: wkr.wp.arvClient, + configJSON: configJSON, + runnerCmd: wkr.wp.runnerCmd, + runnerArgs: wkr.wp.runnerArgs, remoteUser: wkr.instance.RemoteUser(), timeoutTERM: wkr.wp.timeoutTERM, timeoutSignal: wkr.wp.timeoutSignal, @@ -57,22 +90,15 @@ func newRemoteRunner(uuid string, wkr *worker) *remoteRunner { // assume the remote process _might_ have started, at least until it // probes the worker and finds otherwise. func (rr *remoteRunner) Start() { - env := map[string]string{ - "ARVADOS_API_HOST": rr.arvClient.APIHost, - "ARVADOS_API_TOKEN": rr.arvClient.AuthToken, - } - if rr.arvClient.Insecure { - env["ARVADOS_API_HOST_INSECURE"] = "1" - } - envJSON, err := json.Marshal(env) - if err != nil { - panic(err) + cmd := rr.runnerCmd + " --detach --stdin-config" + for _, arg := range rr.runnerArgs { + cmd += " '" + strings.Replace(arg, "'", "'\\''", -1) + "'" } - stdin := bytes.NewBuffer(envJSON) - cmd := "crunch-run --detach --stdin-env '" + rr.uuid + "'" + cmd += " '" + rr.uuid + "'" if rr.remoteUser != "root" { cmd = "sudo " + cmd } + stdin := bytes.NewBuffer(rr.configJSON) stdout, stderr, err := rr.executor.Execute(nil, cmd, stdin) if err != nil { rr.logger.WithField("stdout", string(stdout)). @@ -128,7 +154,7 @@ func (rr *remoteRunner) Kill(reason string) { func (rr *remoteRunner) kill(sig syscall.Signal) { logger := rr.logger.WithField("Signal", int(sig)) logger.Info("sending signal") - cmd := fmt.Sprintf("crunch-run --kill %d %s", sig, rr.uuid) + cmd := fmt.Sprintf(rr.runnerCmd+" --kill %d %s", sig, rr.uuid) if rr.remoteUser != "root" { cmd = "sudo " + cmd }