Merge branch '21666-provision-test-improvement'
[arvados.git] / lib / cloud / cloudtest / tester.go
index ad3f70f123d6d4147fc634ef6e121481d3d6cea6..a335278ed6b15a91794bc8927697dec3a6aef1ec 100644 (file)
@@ -11,10 +11,10 @@ import (
        "fmt"
        "time"
 
-       "git.curoverse.com/arvados.git/lib/cloud"
-       "git.curoverse.com/arvados.git/lib/dispatchcloud/ssh_executor"
-       "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/lib/cloud"
+       "git.arvados.org/arvados.git/lib/dispatchcloud/sshexecutor"
+       "git.arvados.org/arvados.git/lib/dispatchcloud/worker"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
        "github.com/sirupsen/logrus"
        "golang.org/x/crypto/ssh"
 )
@@ -27,43 +27,89 @@ var (
 // configuration. Run() should be called only once, after assigning
 // suitable values to public fields.
 type tester struct {
-       Logger             logrus.FieldLogger
-       Tags               cloud.SharedResourceTags
-       TagKeyPrefix       string
-       SetID              cloud.InstanceSetID
-       DestroyExisting    bool
-       ProbeInterval      time.Duration
-       SyncInterval       time.Duration
-       TimeoutBooting     time.Duration
-       Driver             cloud.Driver
-       DriverParameters   json.RawMessage
-       InstanceType       arvados.InstanceType
-       ImageID            cloud.ImageID
-       SSHKey             ssh.Signer
-       SSHPort            string
-       BootProbeCommand   string
-       ShellCommand       string
-       PauseBeforeDestroy func()
+       Logger              logrus.FieldLogger
+       Tags                cloud.SharedResourceTags
+       TagKeyPrefix        string
+       SetID               cloud.InstanceSetID
+       DestroyExisting     bool
+       ProbeInterval       time.Duration
+       SyncInterval        time.Duration
+       TimeoutBooting      time.Duration
+       Driver              cloud.Driver
+       DriverParameters    json.RawMessage
+       InstanceType        arvados.InstanceType
+       ImageID             cloud.ImageID
+       SSHKey              ssh.Signer
+       SSHPort             string
+       DeployPublicKey     bool
+       BootProbeCommand    string
+       InstanceInitCommand cloud.InitCommand
+       ShellCommand        string
+       PauseBeforeDestroy  func()
 
        is              cloud.InstanceSet
        testInstance    *worker.TagVerifier
        secret          string
-       executor        *ssh_executor.Executor
+       executor        *sshexecutor.Executor
        showedLoginInfo bool
 
        failed bool
 }
 
+// Run the test suite once for each applicable permutation of
+// DriverParameters.  Return true if everything worked.
+//
+// Currently this means run once for each configured SubnetID.
+func (t *tester) Run() bool {
+       var dp map[string]interface{}
+       if len(t.DriverParameters) > 0 {
+               err := json.Unmarshal(t.DriverParameters, &dp)
+               if err != nil {
+                       t.Logger.WithError(err).Error("error decoding configured CloudVMs.DriverParameters")
+                       return false
+               }
+       }
+       subnets, ok := dp["SubnetID"].([]interface{})
+       if !ok || len(subnets) <= 1 {
+               // Easy, only one SubnetID to test.
+               return t.runWithDriverParameters(t.DriverParameters)
+       }
+
+       deferredError := false
+       for i, subnet := range subnets {
+               subnet, ok := subnet.(string)
+               if !ok {
+                       t.Logger.Errorf("CloudVMs.DriverParameters.SubnetID[%d] is invalid -- must be a string", i)
+                       deferredError = true
+                       continue
+               }
+               dp["SubnetID"] = subnet
+               t.Logger.Infof("running tests using SubnetID[%d] %q", i, subnet)
+               dpjson, err := json.Marshal(dp)
+               if err != nil {
+                       t.Logger.WithError(err).Error("error encoding driver parameters")
+                       deferredError = true
+                       continue
+               }
+               ok = t.runWithDriverParameters(dpjson)
+               if !ok {
+                       t.Logger.Infof("failed tests using SubnetID[%d] %q", i, subnet)
+                       deferredError = true
+               }
+       }
+       return !deferredError
+}
+
 // Run the test suite as specified, clean up as needed, and return
 // true (everything is OK) or false (something went wrong).
-func (t *tester) Run() bool {
+func (t *tester) runWithDriverParameters(driverParameters json.RawMessage) bool {
        // This flag gets set when we encounter a non-fatal error, so
        // we can continue doing more tests but remember to return
        // false (failure) at the end.
        deferredError := false
 
        var err error
-       t.is, err = t.Driver.InstanceSet(t.DriverParameters, t.SetID, t.Tags, t.Logger)
+       t.is, err = t.Driver.InstanceSet(driverParameters, t.SetID, t.Tags, t.Logger, nil)
        if err != nil {
                t.Logger.WithError(err).Info("error initializing driver")
                return false
@@ -127,7 +173,12 @@ func (t *tester) Run() bool {
        defer t.destroyTestInstance()
 
        bootDeadline := time.Now().Add(t.TimeoutBooting)
-       initCommand := worker.TagVerifier{nil, t.secret}.InitCommand()
+       initCommand := worker.TagVerifier{Instance: nil, Secret: t.secret, ReportVerified: nil}.InitCommand() + "\n" + t.InstanceInitCommand
+
+       installPublicKey := t.SSHKey.PublicKey()
+       if !t.DeployPublicKey {
+               installPublicKey = nil
+       }
 
        t.Logger.WithFields(logrus.Fields{
                "InstanceType":         t.InstanceType.Name,
@@ -135,9 +186,10 @@ func (t *tester) Run() bool {
                "ImageID":              t.ImageID,
                "Tags":                 tags,
                "InitCommand":          initCommand,
+               "DeployPublicKey":      installPublicKey != nil,
        }).Info("creating instance")
        t0 := time.Now()
-       inst, err := t.is.Create(t.InstanceType, t.ImageID, tags, initCommand, t.SSHKey.PublicKey())
+       inst, err := t.is.Create(t.InstanceType, t.ImageID, tags, initCommand, installPublicKey)
        lgrC := t.Logger.WithField("Duration", time.Since(t0))
        if err != nil {
                // Create() might have failed due to a bug or network
@@ -150,9 +202,8 @@ func (t *tester) Run() bool {
                        if time.Now().After(bootDeadline) {
                                t.Logger.Error("timed out")
                                return false
-                       } else {
-                               t.sleepSyncInterval()
                        }
+                       t.sleepSyncInterval()
                }
                t.Logger.WithField("Instance", t.testInstance.ID()).Info("new instance appeared")
                t.showLoginInfo()
@@ -160,7 +211,7 @@ func (t *tester) Run() bool {
                // Create() succeeded. Make sure the new instance
                // appears right away in the Instances() list.
                lgrC.WithField("Instance", inst.ID()).Info("created instance")
-               t.testInstance = &worker.TagVerifier{inst, t.secret}
+               t.testInstance = &worker.TagVerifier{Instance: inst, Secret: t.secret, ReportVerified: nil}
                t.showLoginInfo()
                err = t.refreshTestInstance()
                if err == errTestInstanceNotFound {
@@ -236,7 +287,7 @@ func (t *tester) refreshTestInstance() error {
                        "Instance": i.ID(),
                        "Address":  i.Address(),
                }).Info("found our instance in returned list")
-               t.testInstance = &worker.TagVerifier{i, t.secret}
+               t.testInstance = &worker.TagVerifier{Instance: i, Secret: t.secret, ReportVerified: nil}
                if !t.showedLoginInfo {
                        t.showLoginInfo()
                }
@@ -308,7 +359,7 @@ func (t *tester) waitForBoot(deadline time.Time) bool {
 // current address.
 func (t *tester) updateExecutor() {
        if t.executor == nil {
-               t.executor = ssh_executor.New(t.testInstance)
+               t.executor = sshexecutor.New(t.testInstance)
                t.executor.SetTargetPort(t.SSHPort)
                t.executor.SetSigners(t.SSHKey)
        } else {