X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/0684f84ec50069b24b4564474db67d6b44266699..1efba8f3b728a3b8aa3c64c5aa09f441318ff2a8:/lib/cloud/cloudtest/tester.go diff --git a/lib/cloud/cloudtest/tester.go b/lib/cloud/cloudtest/tester.go index 33ff264bcc..a335278ed6 100644 --- a/lib/cloud/cloudtest/tester.go +++ b/lib/cloud/cloudtest/tester.go @@ -11,10 +11,10 @@ import ( "fmt" "time" - "git.curoverse.com/arvados.git/lib/cloud" - "git.curoverse.com/arvados.git/lib/dispatchcloud/ssh_executor" - "git.curoverse.com/arvados.git/lib/dispatchcloud/worker" - "git.curoverse.com/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/lib/cloud" + "git.arvados.org/arvados.git/lib/dispatchcloud/sshexecutor" + "git.arvados.org/arvados.git/lib/dispatchcloud/worker" + "git.arvados.org/arvados.git/sdk/go/arvados" "github.com/sirupsen/logrus" "golang.org/x/crypto/ssh" ) @@ -27,59 +27,106 @@ var ( // configuration. Run() should be called only once, after assigning // suitable values to public fields. type tester struct { - Logger logrus.FieldLogger - Tags cloud.SharedResourceTags - TagKeyPrefix string - SetID cloud.InstanceSetID - DestroyExisting bool - ProbeInterval time.Duration - SyncInterval time.Duration - TimeoutBooting time.Duration - Driver cloud.Driver - DriverParameters json.RawMessage - InstanceType arvados.InstanceType - ImageID cloud.ImageID - SSHKey ssh.Signer - SSHPort string - BootProbeCommand string - ShellCommand string - PauseBeforeDestroy func() + Logger logrus.FieldLogger + Tags cloud.SharedResourceTags + TagKeyPrefix string + SetID cloud.InstanceSetID + DestroyExisting bool + ProbeInterval time.Duration + SyncInterval time.Duration + TimeoutBooting time.Duration + Driver cloud.Driver + DriverParameters json.RawMessage + InstanceType arvados.InstanceType + ImageID cloud.ImageID + SSHKey ssh.Signer + SSHPort string + DeployPublicKey bool + BootProbeCommand string + InstanceInitCommand cloud.InitCommand + ShellCommand string + PauseBeforeDestroy func() is cloud.InstanceSet - testInstance cloud.Instance + testInstance *worker.TagVerifier secret string - executor *ssh_executor.Executor + executor *sshexecutor.Executor showedLoginInfo bool failed bool } +// Run the test suite once for each applicable permutation of +// DriverParameters. Return true if everything worked. +// +// Currently this means run once for each configured SubnetID. +func (t *tester) Run() bool { + var dp map[string]interface{} + if len(t.DriverParameters) > 0 { + err := json.Unmarshal(t.DriverParameters, &dp) + if err != nil { + t.Logger.WithError(err).Error("error decoding configured CloudVMs.DriverParameters") + return false + } + } + subnets, ok := dp["SubnetID"].([]interface{}) + if !ok || len(subnets) <= 1 { + // Easy, only one SubnetID to test. + return t.runWithDriverParameters(t.DriverParameters) + } + + deferredError := false + for i, subnet := range subnets { + subnet, ok := subnet.(string) + if !ok { + t.Logger.Errorf("CloudVMs.DriverParameters.SubnetID[%d] is invalid -- must be a string", i) + deferredError = true + continue + } + dp["SubnetID"] = subnet + t.Logger.Infof("running tests using SubnetID[%d] %q", i, subnet) + dpjson, err := json.Marshal(dp) + if err != nil { + t.Logger.WithError(err).Error("error encoding driver parameters") + deferredError = true + continue + } + ok = t.runWithDriverParameters(dpjson) + if !ok { + t.Logger.Infof("failed tests using SubnetID[%d] %q", i, subnet) + deferredError = true + } + } + return !deferredError +} + // Run the test suite as specified, clean up as needed, and return // true (everything is OK) or false (something went wrong). -func (t *tester) Run() bool { +func (t *tester) runWithDriverParameters(driverParameters json.RawMessage) bool { // This flag gets set when we encounter a non-fatal error, so // we can continue doing more tests but remember to return // false (failure) at the end. deferredError := false var err error - t.is, err = t.Driver.InstanceSet(t.DriverParameters, t.SetID, t.Tags, t.Logger) + t.is, err = t.Driver.InstanceSet(driverParameters, t.SetID, t.Tags, t.Logger, nil) if err != nil { t.Logger.WithError(err).Info("error initializing driver") return false } - // Don't send the driver any filters the first time we get the - // instance list. This way we can log an instance count - // (N=...) that includes all instances in this service - // account, even if they don't have the same InstanceSetID. - insts, err := t.getInstances(nil) - if err != nil { - t.Logger.WithError(err).Info("error getting initial list of instances") - return false - } - for { + // Don't send the driver any filters when getting the + // initial instance list. This way we can log an + // instance count (N=...) that includes all instances + // in this service account, even if they don't have + // the same InstanceSetID. + insts, err := t.getInstances(nil) + if err != nil { + t.Logger.WithError(err).Info("error getting list of instances") + return false + } + foundExisting := false for _, i := range insts { if i.Tags()[t.TagKeyPrefix+"InstanceSetID"] != string(t.SetID) { @@ -92,7 +139,9 @@ func (t *tester) Run() bool { foundExisting = true if t.DestroyExisting { lgr.Info("destroying existing instance with our InstanceSetID") + t0 := time.Now() err := i.Destroy() + lgr := lgr.WithField("Duration", time.Since(t0)) if err != nil { lgr.WithError(err).Error("error destroying existing instance") } else { @@ -124,7 +173,12 @@ func (t *tester) Run() bool { defer t.destroyTestInstance() bootDeadline := time.Now().Add(t.TimeoutBooting) - initCommand := worker.TagVerifier{nil, t.secret}.InitCommand() + initCommand := worker.TagVerifier{Instance: nil, Secret: t.secret, ReportVerified: nil}.InitCommand() + "\n" + t.InstanceInitCommand + + installPublicKey := t.SSHKey.PublicKey() + if !t.DeployPublicKey { + installPublicKey = nil + } t.Logger.WithFields(logrus.Fields{ "InstanceType": t.InstanceType.Name, @@ -132,30 +186,32 @@ func (t *tester) Run() bool { "ImageID": t.ImageID, "Tags": tags, "InitCommand": initCommand, + "DeployPublicKey": installPublicKey != nil, }).Info("creating instance") - inst, err := t.is.Create(t.InstanceType, t.ImageID, tags, initCommand, t.SSHKey.PublicKey()) + t0 := time.Now() + inst, err := t.is.Create(t.InstanceType, t.ImageID, tags, initCommand, installPublicKey) + lgrC := t.Logger.WithField("Duration", time.Since(t0)) if err != nil { // Create() might have failed due to a bug or network // error even though the creation was successful, so // it's safer to wait a bit for an instance to appear. deferredError = true - t.Logger.WithError(err).Error("error creating test instance") + lgrC.WithError(err).Error("error creating test instance") t.Logger.WithField("Deadline", bootDeadline).Info("waiting for instance to appear anyway, in case the Create response was incorrect") for err = t.refreshTestInstance(); err != nil; err = t.refreshTestInstance() { if time.Now().After(bootDeadline) { t.Logger.Error("timed out") return false - } else { - t.sleepSyncInterval() } + t.sleepSyncInterval() } t.Logger.WithField("Instance", t.testInstance.ID()).Info("new instance appeared") t.showLoginInfo() } else { // Create() succeeded. Make sure the new instance // appears right away in the Instances() list. - t.Logger.WithField("Instance", inst.ID()).Info("created instance") - t.testInstance = inst + lgrC.WithField("Instance", inst.ID()).Info("created instance") + t.testInstance = &worker.TagVerifier{Instance: inst, Secret: t.secret, ReportVerified: nil} t.showLoginInfo() err = t.refreshTestInstance() if err == errTestInstanceNotFound { @@ -166,15 +222,14 @@ func (t *tester) Run() bool { return false } } - t.testInstance = worker.TagVerifier{t.testInstance, t.secret} if !t.checkTags() { // checkTags() already logged the errors - return false + deferredError = true } if !t.waitForBoot(bootDeadline) { - return false + deferredError = true } if t.ShellCommand != "" { @@ -232,7 +287,7 @@ func (t *tester) refreshTestInstance() error { "Instance": i.ID(), "Address": i.Address(), }).Info("found our instance in returned list") - t.testInstance = worker.TagVerifier{i, t.secret} + t.testInstance = &worker.TagVerifier{Instance: i, Secret: t.secret, ReportVerified: nil} if !t.showedLoginInfo { t.showLoginInfo() } @@ -248,11 +303,15 @@ func (t *tester) refreshTestInstance() error { func (t *tester) getInstances(tags cloud.InstanceTags) ([]cloud.Instance, error) { var ret []cloud.Instance t.Logger.WithField("FilterTags", tags).Info("getting instance list") + t0 := time.Now() insts, err := t.is.Instances(tags) if err != nil { return nil, err } - t.Logger.WithField("N", len(insts)).Info("got instance list") + t.Logger.WithFields(logrus.Fields{ + "Duration": time.Since(t0), + "N": len(insts), + }).Info("got instance list") for _, i := range insts { if i.Tags()[t.TagKeyPrefix+"InstanceSetID"] == string(t.SetID) { ret = append(ret, i) @@ -300,7 +359,7 @@ func (t *tester) waitForBoot(deadline time.Time) bool { // current address. func (t *tester) updateExecutor() { if t.executor == nil { - t.executor = ssh_executor.New(t.testInstance) + t.executor = sshexecutor.New(t.testInstance) t.executor.SetTargetPort(t.SSHPort) t.executor.SetSigners(t.SSHKey) } else { @@ -313,11 +372,13 @@ func (t *tester) runShellCommand(cmd string) error { t.Logger.WithFields(logrus.Fields{ "Command": cmd, }).Info("executing remote command") + t0 := time.Now() stdout, stderr, err := t.executor.Execute(nil, cmd, nil) lgr := t.Logger.WithFields(logrus.Fields{ - "Command": cmd, - "stdout": string(stdout), - "stderr": string(stderr), + "Duration": time.Since(t0), + "Command": cmd, + "stdout": string(stdout), + "stderr": string(stderr), }) if err != nil { lgr.WithError(err).Info("remote command failed") @@ -333,20 +394,25 @@ func (t *tester) destroyTestInstance() bool { return true } for { - t.Logger.WithField("Instance", t.testInstance.ID()).Info("destroying instance") + lgr := t.Logger.WithField("Instance", t.testInstance.ID()) + lgr.Info("destroying instance") + t0 := time.Now() + err := t.testInstance.Destroy() + lgrDur := lgr.WithField("Duration", time.Since(t0)) if err != nil { - t.Logger.WithError(err).WithField("Instance", t.testInstance.ID()).Error("error destroying instance") + lgrDur.WithError(err).Error("error destroying instance") } else { - t.Logger.WithField("Instance", t.testInstance.ID()).Info("destroyed instance") + lgrDur.Info("destroyed instance") } + err = t.refreshTestInstance() if err == errTestInstanceNotFound { - t.Logger.WithField("Instance", t.testInstance.ID()).Info("instance no longer appears in list") + lgr.Info("instance no longer appears in list") t.testInstance = nil return true } else if err == nil { - t.Logger.WithField("Instance", t.testInstance.ID()).Info("instance still exists after calling Destroy") + lgr.Info("instance still exists after calling Destroy") t.sleepSyncInterval() continue } else {