"bytes"
"encoding/json"
"fmt"
+ "net"
+ "strings"
"syscall"
"time"
- "git.curoverse.com/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/lib/crunchrun"
"github.com/sirupsen/logrus"
)
type remoteRunner struct {
uuid string
executor Executor
- arvClient *arvados.Client
+ configJSON json.RawMessage
+ runnerCmd string
+ runnerArgs []string
remoteUser string
timeoutTERM time.Duration
timeoutSignal time.Duration
// newRemoteRunner returns a new remoteRunner. Caller should ensure
// Close() is called to release resources.
func newRemoteRunner(uuid string, wkr *worker) *remoteRunner {
+ // Send the instance type record as a JSON doc so crunch-run
+ // can log it.
+ var instJSON bytes.Buffer
+ enc := json.NewEncoder(&instJSON)
+ enc.SetIndent("", " ")
+ if err := enc.Encode(wkr.instType); err != nil {
+ panic(err)
+ }
+ var configData crunchrun.ConfigData
+ configData.Env = map[string]string{
+ "ARVADOS_API_HOST": wkr.wp.arvClient.APIHost,
+ "ARVADOS_API_TOKEN": wkr.wp.arvClient.AuthToken,
+ "InstanceType": instJSON.String(),
+ "GatewayAddress": net.JoinHostPort(wkr.instance.Address(), "0"),
+ "GatewayAuthSecret": wkr.wp.gatewayAuthSecret(uuid),
+ }
+ if wkr.wp.arvClient.Insecure {
+ configData.Env["ARVADOS_API_HOST_INSECURE"] = "1"
+ }
+ if bufs := wkr.wp.cluster.Containers.LocalKeepBlobBuffersPerVCPU; bufs > 0 {
+ configData.Cluster = wkr.wp.cluster
+ configData.KeepBuffers = bufs * wkr.instType.VCPUs
+ }
+ if wkr.wp.cluster.Containers.CloudVMs.Driver == "ec2" && wkr.instType.Preemptible {
+ configData.EC2SpotCheck = true
+ }
+ configJSON, err := json.Marshal(configData)
+ if err != nil {
+ panic(err)
+ }
rr := &remoteRunner{
uuid: uuid,
executor: wkr.executor,
- arvClient: wkr.wp.arvClient,
+ configJSON: configJSON,
+ runnerCmd: wkr.wp.runnerCmd,
+ runnerArgs: wkr.wp.runnerArgs,
remoteUser: wkr.instance.RemoteUser(),
timeoutTERM: wkr.wp.timeoutTERM,
timeoutSignal: wkr.wp.timeoutSignal,
// assume the remote process _might_ have started, at least until it
// probes the worker and finds otherwise.
func (rr *remoteRunner) Start() {
- env := map[string]string{
- "ARVADOS_API_HOST": rr.arvClient.APIHost,
- "ARVADOS_API_TOKEN": rr.arvClient.AuthToken,
- }
- if rr.arvClient.Insecure {
- env["ARVADOS_API_HOST_INSECURE"] = "1"
- }
- envJSON, err := json.Marshal(env)
- if err != nil {
- panic(err)
+ cmd := rr.runnerCmd + " --detach --stdin-config"
+ for _, arg := range rr.runnerArgs {
+ cmd += " '" + strings.Replace(arg, "'", "'\\''", -1) + "'"
}
- stdin := bytes.NewBuffer(envJSON)
- cmd := "crunch-run --detach --stdin-env '" + rr.uuid + "'"
+ cmd += " '" + rr.uuid + "'"
if rr.remoteUser != "root" {
cmd = "sudo " + cmd
}
+ stdin := bytes.NewBuffer(rr.configJSON)
stdout, stderr, err := rr.executor.Execute(nil, cmd, stdin)
if err != nil {
rr.logger.WithField("stdout", string(stdout)).
func (rr *remoteRunner) kill(sig syscall.Signal) {
logger := rr.logger.WithField("Signal", int(sig))
logger.Info("sending signal")
- cmd := fmt.Sprintf("crunch-run --kill %d %s", sig, rr.uuid)
+ cmd := fmt.Sprintf(rr.runnerCmd+" --kill %d %s", sig, rr.uuid)
if rr.remoteUser != "root" {
cmd = "sudo " + cmd
}