"bytes"
"encoding/json"
"fmt"
+ "net"
+ "strings"
"syscall"
"time"
+ "git.arvados.org/arvados.git/lib/crunchrun"
"github.com/sirupsen/logrus"
)
type remoteRunner struct {
uuid string
executor Executor
- envJSON json.RawMessage
+ configJSON json.RawMessage
+ runnerCmd string
+ runnerArgs []string
remoteUser string
timeoutTERM time.Duration
timeoutSignal time.Duration
// newRemoteRunner returns a new remoteRunner. Caller should ensure
// Close() is called to release resources.
func newRemoteRunner(uuid string, wkr *worker) *remoteRunner {
- // Early (<1.5) versions of crunch-run error out if they see
- // non-string values in the env map -- so here we send the
- // instance type record as a JSON doc. Once worker images are
- // updated, we can skip the extra encoding, and just include
- // {"InstanceType": wkr.instType} in the env map.
+ // Send the instance type record as a JSON doc so crunch-run
+ // can log it.
var instJSON bytes.Buffer
enc := json.NewEncoder(&instJSON)
enc.SetIndent("", " ")
if err := enc.Encode(wkr.instType); err != nil {
panic(err)
}
- env := map[string]string{
+ var configData crunchrun.ConfigData
+ configData.Env = map[string]string{
"ARVADOS_API_HOST": wkr.wp.arvClient.APIHost,
"ARVADOS_API_TOKEN": wkr.wp.arvClient.AuthToken,
"InstanceType": instJSON.String(),
+ "GatewayAddress": net.JoinHostPort(wkr.instance.Address(), "0"),
+ "GatewayAuthSecret": wkr.wp.gatewayAuthSecret(uuid),
}
if wkr.wp.arvClient.Insecure {
- env["ARVADOS_API_HOST_INSECURE"] = "1"
+ configData.Env["ARVADOS_API_HOST_INSECURE"] = "1"
}
- envJSON, err := json.Marshal(env)
+ if bufs := wkr.wp.cluster.Containers.LocalKeepBlobBuffersPerVCPU; bufs > 0 {
+ configData.Cluster = wkr.wp.cluster
+ configData.KeepBuffers = bufs * wkr.instType.VCPUs
+ }
+ if wkr.wp.cluster.Containers.CloudVMs.Driver == "ec2" && wkr.instType.Preemptible {
+ configData.EC2SpotCheck = true
+ }
+ configJSON, err := json.Marshal(configData)
if err != nil {
panic(err)
}
rr := &remoteRunner{
uuid: uuid,
executor: wkr.executor,
- envJSON: envJSON,
+ configJSON: configJSON,
+ runnerCmd: wkr.wp.runnerCmd,
+ runnerArgs: wkr.wp.runnerArgs,
remoteUser: wkr.instance.RemoteUser(),
timeoutTERM: wkr.wp.timeoutTERM,
timeoutSignal: wkr.wp.timeoutSignal,
// assume the remote process _might_ have started, at least until it
// probes the worker and finds otherwise.
func (rr *remoteRunner) Start() {
- cmd := "crunch-run --detach --stdin-env '" + rr.uuid + "'"
+ cmd := rr.runnerCmd + " --detach --stdin-config"
+ for _, arg := range rr.runnerArgs {
+ cmd += " '" + strings.Replace(arg, "'", "'\\''", -1) + "'"
+ }
+ cmd += " '" + rr.uuid + "'"
if rr.remoteUser != "root" {
cmd = "sudo " + cmd
}
- stdin := bytes.NewBuffer(rr.envJSON)
+ stdin := bytes.NewBuffer(rr.configJSON)
stdout, stderr, err := rr.executor.Execute(nil, cmd, stdin)
if err != nil {
rr.logger.WithField("stdout", string(stdout)).
func (rr *remoteRunner) kill(sig syscall.Signal) {
logger := rr.logger.WithField("Signal", int(sig))
logger.Info("sending signal")
- cmd := fmt.Sprintf("crunch-run --kill %d %s", sig, rr.uuid)
+ cmd := fmt.Sprintf(rr.runnerCmd+" --kill %d %s", sig, rr.uuid)
if rr.remoteUser != "root" {
cmd = "sudo " + cmd
}