20520: Add Containers.CloudVMs.InstanceInitCommand config.
authorTom Clegg <tom@curii.com>
Tue, 23 May 2023 21:40:06 +0000 (17:40 -0400)
committerTom Clegg <tom@curii.com>
Tue, 23 May 2023 21:40:06 +0000 (17:40 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/cloud/cloudtest/cmd.go
lib/cloud/cloudtest/tester.go
lib/config/config.default.yml
lib/dispatchcloud/worker/pool.go
sdk/go/arvados/config.go

index 0ec79e1175dcda50d98609c55d4ff15d7e976d11..b3a262c7e49cce9b6e7e18f746377dd07a927085 100644 (file)
@@ -86,22 +86,23 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
        tagKeyPrefix := cluster.Containers.CloudVMs.TagKeyPrefix
        tags[tagKeyPrefix+"CloudTestPID"] = fmt.Sprintf("%d", os.Getpid())
        if !(&tester{
-               Logger:           logger,
-               Tags:             tags,
-               TagKeyPrefix:     tagKeyPrefix,
-               SetID:            cloud.InstanceSetID(*instanceSetID),
-               DestroyExisting:  *destroyExisting,
-               ProbeInterval:    cluster.Containers.CloudVMs.ProbeInterval.Duration(),
-               SyncInterval:     cluster.Containers.CloudVMs.SyncInterval.Duration(),
-               TimeoutBooting:   cluster.Containers.CloudVMs.TimeoutBooting.Duration(),
-               Driver:           driver,
-               DriverParameters: cluster.Containers.CloudVMs.DriverParameters,
-               ImageID:          cloud.ImageID(*imageID),
-               InstanceType:     it,
-               SSHKey:           key,
-               SSHPort:          cluster.Containers.CloudVMs.SSHPort,
-               BootProbeCommand: cluster.Containers.CloudVMs.BootProbeCommand,
-               ShellCommand:     *shellCommand,
+               Logger:              logger,
+               Tags:                tags,
+               TagKeyPrefix:        tagKeyPrefix,
+               SetID:               cloud.InstanceSetID(*instanceSetID),
+               DestroyExisting:     *destroyExisting,
+               ProbeInterval:       cluster.Containers.CloudVMs.ProbeInterval.Duration(),
+               SyncInterval:        cluster.Containers.CloudVMs.SyncInterval.Duration(),
+               TimeoutBooting:      cluster.Containers.CloudVMs.TimeoutBooting.Duration(),
+               Driver:              driver,
+               DriverParameters:    cluster.Containers.CloudVMs.DriverParameters,
+               ImageID:             cloud.ImageID(*imageID),
+               InstanceType:        it,
+               SSHKey:              key,
+               SSHPort:             cluster.Containers.CloudVMs.SSHPort,
+               BootProbeCommand:    cluster.Containers.CloudVMs.BootProbeCommand,
+               InstanceInitCommand: cloud.InitCommand(cluster.Containers.CloudVMs.InstanceInitCommand),
+               ShellCommand:        *shellCommand,
                PauseBeforeDestroy: func() {
                        if *pauseBeforeDestroy {
                                logger.Info("waiting for operator to press Enter")
index 9fd7c9e74941f8e12c47ae6bab60f5ea764fa422..41e8f658ac20f606b1cc89ea2426bb474813e13b 100644 (file)
@@ -27,23 +27,24 @@ var (
 // configuration. Run() should be called only once, after assigning
 // suitable values to public fields.
 type tester struct {
-       Logger             logrus.FieldLogger
-       Tags               cloud.SharedResourceTags
-       TagKeyPrefix       string
-       SetID              cloud.InstanceSetID
-       DestroyExisting    bool
-       ProbeInterval      time.Duration
-       SyncInterval       time.Duration
-       TimeoutBooting     time.Duration
-       Driver             cloud.Driver
-       DriverParameters   json.RawMessage
-       InstanceType       arvados.InstanceType
-       ImageID            cloud.ImageID
-       SSHKey             ssh.Signer
-       SSHPort            string
-       BootProbeCommand   string
-       ShellCommand       string
-       PauseBeforeDestroy func()
+       Logger              logrus.FieldLogger
+       Tags                cloud.SharedResourceTags
+       TagKeyPrefix        string
+       SetID               cloud.InstanceSetID
+       DestroyExisting     bool
+       ProbeInterval       time.Duration
+       SyncInterval        time.Duration
+       TimeoutBooting      time.Duration
+       Driver              cloud.Driver
+       DriverParameters    json.RawMessage
+       InstanceType        arvados.InstanceType
+       ImageID             cloud.ImageID
+       SSHKey              ssh.Signer
+       SSHPort             string
+       BootProbeCommand    string
+       InstanceInitCommand cloud.InitCommand
+       ShellCommand        string
+       PauseBeforeDestroy  func()
 
        is              cloud.InstanceSet
        testInstance    *worker.TagVerifier
@@ -127,7 +128,7 @@ func (t *tester) Run() bool {
        defer t.destroyTestInstance()
 
        bootDeadline := time.Now().Add(t.TimeoutBooting)
-       initCommand := worker.TagVerifier{Instance: nil, Secret: t.secret, ReportVerified: nil}.InitCommand()
+       initCommand := worker.TagVerifier{Instance: nil, Secret: t.secret, ReportVerified: nil}.InitCommand() + "\n" + t.InstanceInitCommand
 
        t.Logger.WithFields(logrus.Fields{
                "InstanceType":         t.InstanceType.Name,
index 8203a94de9dceae9c9d2af55358d81cc20090fb9..06f4fb55ed6455121247b2b3478433db740744ee 100644 (file)
@@ -1397,6 +1397,12 @@ Clusters:
         # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
         ImageID: ""
 
+        # Shell script to run on new instances using the cloud
+        # provider's UserData (EC2) or CustomData (Azure) feature.
+        #
+        # It is not necessary to include a #!/bin/sh line.
+        InstanceInitCommand: ""
+
         # An executable file (located on the dispatcher host) to be
         # copied to cloud instances at runtime and used as the
         # container runner/supervisor. The default value is the
index 4bf969358d2c5b038a1a9daff17fd5451bc4703a..3de207ffa0e4eaf26bea0200e2aebc6ff7b1d7c4 100644 (file)
@@ -106,6 +106,7 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
                newExecutor:                    newExecutor,
                cluster:                        cluster,
                bootProbeCommand:               cluster.Containers.CloudVMs.BootProbeCommand,
+               instanceInitCommand:            cloud.InitCommand(cluster.Containers.CloudVMs.InstanceInitCommand),
                runnerSource:                   cluster.Containers.CloudVMs.DeployRunnerBinary,
                imageID:                        cloud.ImageID(cluster.Containers.CloudVMs.ImageID),
                instanceTypes:                  cluster.InstanceTypes,
@@ -149,6 +150,7 @@ type Pool struct {
        newExecutor                    func(cloud.Instance) Executor
        cluster                        *arvados.Cluster
        bootProbeCommand               string
+       instanceInitCommand            cloud.InitCommand
        runnerSource                   string
        imageID                        cloud.ImageID
        instanceTypes                  map[string]arvados.InstanceType
@@ -347,7 +349,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
                        wp.tagKeyPrefix + tagKeyIdleBehavior:   string(IdleBehaviorRun),
                        wp.tagKeyPrefix + tagKeyInstanceSecret: secret,
                }
-               initCmd := TagVerifier{nil, secret, nil}.InitCommand()
+               initCmd := TagVerifier{nil, secret, nil}.InitCommand() + "\n" + wp.instanceInitCommand
                inst, err := wp.instanceSet.Create(it, wp.imageID, tags, initCmd, wp.installPublicKey)
                wp.mtx.Lock()
                defer wp.mtx.Unlock()
index ee0e805134e2f5ff54f89083610a70cb6ffbff63..1018a9f236a9dc3ac6a0196cf5a5833881f7279a 100644 (file)
@@ -557,6 +557,7 @@ type CloudVMsConfig struct {
        Enable bool
 
        BootProbeCommand               string
+       InstanceInitCommand            string
        DeployRunnerBinary             string
        ImageID                        string
        MaxCloudOpsPerSecond           int