X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/0684f84ec50069b24b4564474db67d6b44266699..f608029e1aec903bc35a4748ef51e6f076dae0aa:/lib/cloud/cloudtest/tester.go diff --git a/lib/cloud/cloudtest/tester.go b/lib/cloud/cloudtest/tester.go index 33ff264bcc..9fd7c9e749 100644 --- a/lib/cloud/cloudtest/tester.go +++ b/lib/cloud/cloudtest/tester.go @@ -11,10 +11,10 @@ import ( "fmt" "time" - "git.curoverse.com/arvados.git/lib/cloud" - "git.curoverse.com/arvados.git/lib/dispatchcloud/ssh_executor" - "git.curoverse.com/arvados.git/lib/dispatchcloud/worker" - "git.curoverse.com/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/lib/cloud" + "git.arvados.org/arvados.git/lib/dispatchcloud/sshexecutor" + "git.arvados.org/arvados.git/lib/dispatchcloud/worker" + "git.arvados.org/arvados.git/sdk/go/arvados" "github.com/sirupsen/logrus" "golang.org/x/crypto/ssh" ) @@ -46,9 +46,9 @@ type tester struct { PauseBeforeDestroy func() is cloud.InstanceSet - testInstance cloud.Instance + testInstance *worker.TagVerifier secret string - executor *ssh_executor.Executor + executor *sshexecutor.Executor showedLoginInfo bool failed bool @@ -69,17 +69,18 @@ func (t *tester) Run() bool { return false } - // Don't send the driver any filters the first time we get the - // instance list. This way we can log an instance count - // (N=...) that includes all instances in this service - // account, even if they don't have the same InstanceSetID. - insts, err := t.getInstances(nil) - if err != nil { - t.Logger.WithError(err).Info("error getting initial list of instances") - return false - } - for { + // Don't send the driver any filters when getting the + // initial instance list. This way we can log an + // instance count (N=...) that includes all instances + // in this service account, even if they don't have + // the same InstanceSetID. + insts, err := t.getInstances(nil) + if err != nil { + t.Logger.WithError(err).Info("error getting list of instances") + return false + } + foundExisting := false for _, i := range insts { if i.Tags()[t.TagKeyPrefix+"InstanceSetID"] != string(t.SetID) { @@ -92,7 +93,9 @@ func (t *tester) Run() bool { foundExisting = true if t.DestroyExisting { lgr.Info("destroying existing instance with our InstanceSetID") + t0 := time.Now() err := i.Destroy() + lgr := lgr.WithField("Duration", time.Since(t0)) if err != nil { lgr.WithError(err).Error("error destroying existing instance") } else { @@ -124,7 +127,7 @@ func (t *tester) Run() bool { defer t.destroyTestInstance() bootDeadline := time.Now().Add(t.TimeoutBooting) - initCommand := worker.TagVerifier{nil, t.secret}.InitCommand() + initCommand := worker.TagVerifier{Instance: nil, Secret: t.secret, ReportVerified: nil}.InitCommand() t.Logger.WithFields(logrus.Fields{ "InstanceType": t.InstanceType.Name, @@ -133,29 +136,30 @@ func (t *tester) Run() bool { "Tags": tags, "InitCommand": initCommand, }).Info("creating instance") + t0 := time.Now() inst, err := t.is.Create(t.InstanceType, t.ImageID, tags, initCommand, t.SSHKey.PublicKey()) + lgrC := t.Logger.WithField("Duration", time.Since(t0)) if err != nil { // Create() might have failed due to a bug or network // error even though the creation was successful, so // it's safer to wait a bit for an instance to appear. deferredError = true - t.Logger.WithError(err).Error("error creating test instance") + lgrC.WithError(err).Error("error creating test instance") t.Logger.WithField("Deadline", bootDeadline).Info("waiting for instance to appear anyway, in case the Create response was incorrect") for err = t.refreshTestInstance(); err != nil; err = t.refreshTestInstance() { if time.Now().After(bootDeadline) { t.Logger.Error("timed out") return false - } else { - t.sleepSyncInterval() } + t.sleepSyncInterval() } t.Logger.WithField("Instance", t.testInstance.ID()).Info("new instance appeared") t.showLoginInfo() } else { // Create() succeeded. Make sure the new instance // appears right away in the Instances() list. - t.Logger.WithField("Instance", inst.ID()).Info("created instance") - t.testInstance = inst + lgrC.WithField("Instance", inst.ID()).Info("created instance") + t.testInstance = &worker.TagVerifier{Instance: inst, Secret: t.secret, ReportVerified: nil} t.showLoginInfo() err = t.refreshTestInstance() if err == errTestInstanceNotFound { @@ -166,15 +170,14 @@ func (t *tester) Run() bool { return false } } - t.testInstance = worker.TagVerifier{t.testInstance, t.secret} if !t.checkTags() { // checkTags() already logged the errors - return false + deferredError = true } if !t.waitForBoot(bootDeadline) { - return false + deferredError = true } if t.ShellCommand != "" { @@ -232,7 +235,7 @@ func (t *tester) refreshTestInstance() error { "Instance": i.ID(), "Address": i.Address(), }).Info("found our instance in returned list") - t.testInstance = worker.TagVerifier{i, t.secret} + t.testInstance = &worker.TagVerifier{Instance: i, Secret: t.secret, ReportVerified: nil} if !t.showedLoginInfo { t.showLoginInfo() } @@ -248,11 +251,15 @@ func (t *tester) refreshTestInstance() error { func (t *tester) getInstances(tags cloud.InstanceTags) ([]cloud.Instance, error) { var ret []cloud.Instance t.Logger.WithField("FilterTags", tags).Info("getting instance list") + t0 := time.Now() insts, err := t.is.Instances(tags) if err != nil { return nil, err } - t.Logger.WithField("N", len(insts)).Info("got instance list") + t.Logger.WithFields(logrus.Fields{ + "Duration": time.Since(t0), + "N": len(insts), + }).Info("got instance list") for _, i := range insts { if i.Tags()[t.TagKeyPrefix+"InstanceSetID"] == string(t.SetID) { ret = append(ret, i) @@ -300,7 +307,7 @@ func (t *tester) waitForBoot(deadline time.Time) bool { // current address. func (t *tester) updateExecutor() { if t.executor == nil { - t.executor = ssh_executor.New(t.testInstance) + t.executor = sshexecutor.New(t.testInstance) t.executor.SetTargetPort(t.SSHPort) t.executor.SetSigners(t.SSHKey) } else { @@ -313,11 +320,13 @@ func (t *tester) runShellCommand(cmd string) error { t.Logger.WithFields(logrus.Fields{ "Command": cmd, }).Info("executing remote command") + t0 := time.Now() stdout, stderr, err := t.executor.Execute(nil, cmd, nil) lgr := t.Logger.WithFields(logrus.Fields{ - "Command": cmd, - "stdout": string(stdout), - "stderr": string(stderr), + "Duration": time.Since(t0), + "Command": cmd, + "stdout": string(stdout), + "stderr": string(stderr), }) if err != nil { lgr.WithError(err).Info("remote command failed") @@ -333,20 +342,25 @@ func (t *tester) destroyTestInstance() bool { return true } for { - t.Logger.WithField("Instance", t.testInstance.ID()).Info("destroying instance") + lgr := t.Logger.WithField("Instance", t.testInstance.ID()) + lgr.Info("destroying instance") + t0 := time.Now() + err := t.testInstance.Destroy() + lgrDur := lgr.WithField("Duration", time.Since(t0)) if err != nil { - t.Logger.WithError(err).WithField("Instance", t.testInstance.ID()).Error("error destroying instance") + lgrDur.WithError(err).Error("error destroying instance") } else { - t.Logger.WithField("Instance", t.testInstance.ID()).Info("destroyed instance") + lgrDur.Info("destroyed instance") } + err = t.refreshTestInstance() if err == errTestInstanceNotFound { - t.Logger.WithField("Instance", t.testInstance.ID()).Info("instance no longer appears in list") + lgr.Info("instance no longer appears in list") t.testInstance = nil return true } else if err == nil { - t.Logger.WithField("Instance", t.testInstance.ID()).Info("instance still exists after calling Destroy") + lgr.Info("instance still exists after calling Destroy") t.sleepSyncInterval() continue } else {