18870: Commits stuff
[arvados.git] / lib / cloud / cloudtest / tester.go
index 33ff264bccdd7627fa9da65ba626bbd6a9f95dce..9fd7c9e74941f8e12c47ae6bab60f5ea764fa422 100644 (file)
@@ -11,10 +11,10 @@ import (
        "fmt"
        "time"
 
-       "git.curoverse.com/arvados.git/lib/cloud"
-       "git.curoverse.com/arvados.git/lib/dispatchcloud/ssh_executor"
-       "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/lib/cloud"
+       "git.arvados.org/arvados.git/lib/dispatchcloud/sshexecutor"
+       "git.arvados.org/arvados.git/lib/dispatchcloud/worker"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
        "github.com/sirupsen/logrus"
        "golang.org/x/crypto/ssh"
 )
@@ -46,9 +46,9 @@ type tester struct {
        PauseBeforeDestroy func()
 
        is              cloud.InstanceSet
-       testInstance    cloud.Instance
+       testInstance    *worker.TagVerifier
        secret          string
-       executor        *ssh_executor.Executor
+       executor        *sshexecutor.Executor
        showedLoginInfo bool
 
        failed bool
@@ -69,17 +69,18 @@ func (t *tester) Run() bool {
                return false
        }
 
-       // Don't send the driver any filters the first time we get the
-       // instance list. This way we can log an instance count
-       // (N=...)  that includes all instances in this service
-       // account, even if they don't have the same InstanceSetID.
-       insts, err := t.getInstances(nil)
-       if err != nil {
-               t.Logger.WithError(err).Info("error getting initial list of instances")
-               return false
-       }
-
        for {
+               // Don't send the driver any filters when getting the
+               // initial instance list. This way we can log an
+               // instance count (N=...)  that includes all instances
+               // in this service account, even if they don't have
+               // the same InstanceSetID.
+               insts, err := t.getInstances(nil)
+               if err != nil {
+                       t.Logger.WithError(err).Info("error getting list of instances")
+                       return false
+               }
+
                foundExisting := false
                for _, i := range insts {
                        if i.Tags()[t.TagKeyPrefix+"InstanceSetID"] != string(t.SetID) {
@@ -92,7 +93,9 @@ func (t *tester) Run() bool {
                        foundExisting = true
                        if t.DestroyExisting {
                                lgr.Info("destroying existing instance with our InstanceSetID")
+                               t0 := time.Now()
                                err := i.Destroy()
+                               lgr := lgr.WithField("Duration", time.Since(t0))
                                if err != nil {
                                        lgr.WithError(err).Error("error destroying existing instance")
                                } else {
@@ -124,7 +127,7 @@ func (t *tester) Run() bool {
        defer t.destroyTestInstance()
 
        bootDeadline := time.Now().Add(t.TimeoutBooting)
-       initCommand := worker.TagVerifier{nil, t.secret}.InitCommand()
+       initCommand := worker.TagVerifier{Instance: nil, Secret: t.secret, ReportVerified: nil}.InitCommand()
 
        t.Logger.WithFields(logrus.Fields{
                "InstanceType":         t.InstanceType.Name,
@@ -133,29 +136,30 @@ func (t *tester) Run() bool {
                "Tags":                 tags,
                "InitCommand":          initCommand,
        }).Info("creating instance")
+       t0 := time.Now()
        inst, err := t.is.Create(t.InstanceType, t.ImageID, tags, initCommand, t.SSHKey.PublicKey())
+       lgrC := t.Logger.WithField("Duration", time.Since(t0))
        if err != nil {
                // Create() might have failed due to a bug or network
                // error even though the creation was successful, so
                // it's safer to wait a bit for an instance to appear.
                deferredError = true
-               t.Logger.WithError(err).Error("error creating test instance")
+               lgrC.WithError(err).Error("error creating test instance")
                t.Logger.WithField("Deadline", bootDeadline).Info("waiting for instance to appear anyway, in case the Create response was incorrect")
                for err = t.refreshTestInstance(); err != nil; err = t.refreshTestInstance() {
                        if time.Now().After(bootDeadline) {
                                t.Logger.Error("timed out")
                                return false
-                       } else {
-                               t.sleepSyncInterval()
                        }
+                       t.sleepSyncInterval()
                }
                t.Logger.WithField("Instance", t.testInstance.ID()).Info("new instance appeared")
                t.showLoginInfo()
        } else {
                // Create() succeeded. Make sure the new instance
                // appears right away in the Instances() list.
-               t.Logger.WithField("Instance", inst.ID()).Info("created instance")
-               t.testInstance = inst
+               lgrC.WithField("Instance", inst.ID()).Info("created instance")
+               t.testInstance = &worker.TagVerifier{Instance: inst, Secret: t.secret, ReportVerified: nil}
                t.showLoginInfo()
                err = t.refreshTestInstance()
                if err == errTestInstanceNotFound {
@@ -166,15 +170,14 @@ func (t *tester) Run() bool {
                        return false
                }
        }
-       t.testInstance = worker.TagVerifier{t.testInstance, t.secret}
 
        if !t.checkTags() {
                // checkTags() already logged the errors
-               return false
+               deferredError = true
        }
 
        if !t.waitForBoot(bootDeadline) {
-               return false
+               deferredError = true
        }
 
        if t.ShellCommand != "" {
@@ -232,7 +235,7 @@ func (t *tester) refreshTestInstance() error {
                        "Instance": i.ID(),
                        "Address":  i.Address(),
                }).Info("found our instance in returned list")
-               t.testInstance = worker.TagVerifier{i, t.secret}
+               t.testInstance = &worker.TagVerifier{Instance: i, Secret: t.secret, ReportVerified: nil}
                if !t.showedLoginInfo {
                        t.showLoginInfo()
                }
@@ -248,11 +251,15 @@ func (t *tester) refreshTestInstance() error {
 func (t *tester) getInstances(tags cloud.InstanceTags) ([]cloud.Instance, error) {
        var ret []cloud.Instance
        t.Logger.WithField("FilterTags", tags).Info("getting instance list")
+       t0 := time.Now()
        insts, err := t.is.Instances(tags)
        if err != nil {
                return nil, err
        }
-       t.Logger.WithField("N", len(insts)).Info("got instance list")
+       t.Logger.WithFields(logrus.Fields{
+               "Duration": time.Since(t0),
+               "N":        len(insts),
+       }).Info("got instance list")
        for _, i := range insts {
                if i.Tags()[t.TagKeyPrefix+"InstanceSetID"] == string(t.SetID) {
                        ret = append(ret, i)
@@ -300,7 +307,7 @@ func (t *tester) waitForBoot(deadline time.Time) bool {
 // current address.
 func (t *tester) updateExecutor() {
        if t.executor == nil {
-               t.executor = ssh_executor.New(t.testInstance)
+               t.executor = sshexecutor.New(t.testInstance)
                t.executor.SetTargetPort(t.SSHPort)
                t.executor.SetSigners(t.SSHKey)
        } else {
@@ -313,11 +320,13 @@ func (t *tester) runShellCommand(cmd string) error {
        t.Logger.WithFields(logrus.Fields{
                "Command": cmd,
        }).Info("executing remote command")
+       t0 := time.Now()
        stdout, stderr, err := t.executor.Execute(nil, cmd, nil)
        lgr := t.Logger.WithFields(logrus.Fields{
-               "Command": cmd,
-               "stdout":  string(stdout),
-               "stderr":  string(stderr),
+               "Duration": time.Since(t0),
+               "Command":  cmd,
+               "stdout":   string(stdout),
+               "stderr":   string(stderr),
        })
        if err != nil {
                lgr.WithError(err).Info("remote command failed")
@@ -333,20 +342,25 @@ func (t *tester) destroyTestInstance() bool {
                return true
        }
        for {
-               t.Logger.WithField("Instance", t.testInstance.ID()).Info("destroying instance")
+               lgr := t.Logger.WithField("Instance", t.testInstance.ID())
+               lgr.Info("destroying instance")
+               t0 := time.Now()
+
                err := t.testInstance.Destroy()
+               lgrDur := lgr.WithField("Duration", time.Since(t0))
                if err != nil {
-                       t.Logger.WithError(err).WithField("Instance", t.testInstance.ID()).Error("error destroying instance")
+                       lgrDur.WithError(err).Error("error destroying instance")
                } else {
-                       t.Logger.WithField("Instance", t.testInstance.ID()).Info("destroyed instance")
+                       lgrDur.Info("destroyed instance")
                }
+
                err = t.refreshTestInstance()
                if err == errTestInstanceNotFound {
-                       t.Logger.WithField("Instance", t.testInstance.ID()).Info("instance no longer appears in list")
+                       lgr.Info("instance no longer appears in list")
                        t.testInstance = nil
                        return true
                } else if err == nil {
-                       t.Logger.WithField("Instance", t.testInstance.ID()).Info("instance still exists after calling Destroy")
+                       lgr.Info("instance still exists after calling Destroy")
                        t.sleepSyncInterval()
                        continue
                } else {