"fmt"
"time"
- "git.curoverse.com/arvados.git/lib/cloud"
- "git.curoverse.com/arvados.git/lib/dispatchcloud/ssh_executor"
- "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
- "git.curoverse.com/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/lib/cloud"
+ "git.arvados.org/arvados.git/lib/dispatchcloud/sshexecutor"
+ "git.arvados.org/arvados.git/lib/dispatchcloud/worker"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
"github.com/sirupsen/logrus"
"golang.org/x/crypto/ssh"
)
// configuration. Run() should be called only once, after assigning
// suitable values to public fields.
type tester struct {
- Logger logrus.FieldLogger
- Tags cloud.SharedResourceTags
- TagKeyPrefix string
- SetID cloud.InstanceSetID
- DestroyExisting bool
- ProbeInterval time.Duration
- SyncInterval time.Duration
- TimeoutBooting time.Duration
- Driver cloud.Driver
- DriverParameters json.RawMessage
- InstanceType arvados.InstanceType
- ImageID cloud.ImageID
- SSHKey ssh.Signer
- SSHPort string
- BootProbeCommand string
- ShellCommand string
- PauseBeforeDestroy func()
+ Logger logrus.FieldLogger
+ Tags cloud.SharedResourceTags
+ TagKeyPrefix string
+ SetID cloud.InstanceSetID
+ DestroyExisting bool
+ ProbeInterval time.Duration
+ SyncInterval time.Duration
+ TimeoutBooting time.Duration
+ Driver cloud.Driver
+ DriverParameters json.RawMessage
+ InstanceType arvados.InstanceType
+ ImageID cloud.ImageID
+ SSHKey ssh.Signer
+ SSHPort string
+ BootProbeCommand string
+ InstanceInitCommand cloud.InitCommand
+ ShellCommand string
+ PauseBeforeDestroy func()
is cloud.InstanceSet
testInstance *worker.TagVerifier
secret string
- executor *ssh_executor.Executor
+ executor *sshexecutor.Executor
showedLoginInfo bool
failed bool
}
+// Run the test suite once for each applicable permutation of
+// DriverParameters. Return true if everything worked.
+//
+// Currently this means run once for each configured SubnetID.
+func (t *tester) Run() bool {
+ var dp map[string]interface{}
+ if len(t.DriverParameters) > 0 {
+ err := json.Unmarshal(t.DriverParameters, &dp)
+ if err != nil {
+ t.Logger.WithError(err).Error("error decoding configured CloudVMs.DriverParameters")
+ return false
+ }
+ }
+ subnets, ok := dp["SubnetID"].([]interface{})
+ if !ok || len(subnets) <= 1 {
+ // Easy, only one SubnetID to test.
+ return t.runWithDriverParameters(t.DriverParameters)
+ }
+
+ deferredError := false
+ for i, subnet := range subnets {
+ subnet, ok := subnet.(string)
+ if !ok {
+ t.Logger.Errorf("CloudVMs.DriverParameters.SubnetID[%d] is invalid -- must be a string", i)
+ deferredError = true
+ continue
+ }
+ dp["SubnetID"] = subnet
+ t.Logger.Infof("running tests using SubnetID[%d] %q", i, subnet)
+ dpjson, err := json.Marshal(dp)
+ if err != nil {
+ t.Logger.WithError(err).Error("error encoding driver parameters")
+ deferredError = true
+ continue
+ }
+ ok = t.runWithDriverParameters(dpjson)
+ if !ok {
+ t.Logger.Infof("failed tests using SubnetID[%d] %q", i, subnet)
+ deferredError = true
+ }
+ }
+ return !deferredError
+}
+
// Run the test suite as specified, clean up as needed, and return
// true (everything is OK) or false (something went wrong).
-func (t *tester) Run() bool {
+func (t *tester) runWithDriverParameters(driverParameters json.RawMessage) bool {
// This flag gets set when we encounter a non-fatal error, so
// we can continue doing more tests but remember to return
// false (failure) at the end.
deferredError := false
var err error
- t.is, err = t.Driver.InstanceSet(t.DriverParameters, t.SetID, t.Tags, t.Logger)
+ t.is, err = t.Driver.InstanceSet(driverParameters, t.SetID, t.Tags, t.Logger, nil)
if err != nil {
t.Logger.WithError(err).Info("error initializing driver")
return false
}
- // Don't send the driver any filters the first time we get the
- // instance list. This way we can log an instance count
- // (N=...) that includes all instances in this service
- // account, even if they don't have the same InstanceSetID.
- insts, err := t.getInstances(nil)
- if err != nil {
- t.Logger.WithError(err).Info("error getting initial list of instances")
- return false
- }
-
for {
+ // Don't send the driver any filters when getting the
+ // initial instance list. This way we can log an
+ // instance count (N=...) that includes all instances
+ // in this service account, even if they don't have
+ // the same InstanceSetID.
+ insts, err := t.getInstances(nil)
+ if err != nil {
+ t.Logger.WithError(err).Info("error getting list of instances")
+ return false
+ }
+
foundExisting := false
for _, i := range insts {
if i.Tags()[t.TagKeyPrefix+"InstanceSetID"] != string(t.SetID) {
foundExisting = true
if t.DestroyExisting {
lgr.Info("destroying existing instance with our InstanceSetID")
+ t0 := time.Now()
err := i.Destroy()
+ lgr := lgr.WithField("Duration", time.Since(t0))
if err != nil {
lgr.WithError(err).Error("error destroying existing instance")
} else {
defer t.destroyTestInstance()
bootDeadline := time.Now().Add(t.TimeoutBooting)
- initCommand := worker.TagVerifier{nil, t.secret}.InitCommand()
+ initCommand := worker.TagVerifier{Instance: nil, Secret: t.secret, ReportVerified: nil}.InitCommand() + "\n" + t.InstanceInitCommand
t.Logger.WithFields(logrus.Fields{
"InstanceType": t.InstanceType.Name,
"Tags": tags,
"InitCommand": initCommand,
}).Info("creating instance")
+ t0 := time.Now()
inst, err := t.is.Create(t.InstanceType, t.ImageID, tags, initCommand, t.SSHKey.PublicKey())
+ lgrC := t.Logger.WithField("Duration", time.Since(t0))
if err != nil {
// Create() might have failed due to a bug or network
// error even though the creation was successful, so
// it's safer to wait a bit for an instance to appear.
deferredError = true
- t.Logger.WithError(err).Error("error creating test instance")
+ lgrC.WithError(err).Error("error creating test instance")
t.Logger.WithField("Deadline", bootDeadline).Info("waiting for instance to appear anyway, in case the Create response was incorrect")
for err = t.refreshTestInstance(); err != nil; err = t.refreshTestInstance() {
if time.Now().After(bootDeadline) {
t.Logger.Error("timed out")
return false
- } else {
- t.sleepSyncInterval()
}
+ t.sleepSyncInterval()
}
t.Logger.WithField("Instance", t.testInstance.ID()).Info("new instance appeared")
t.showLoginInfo()
} else {
// Create() succeeded. Make sure the new instance
// appears right away in the Instances() list.
- t.Logger.WithField("Instance", inst.ID()).Info("created instance")
- t.testInstance = &worker.TagVerifier{inst, t.secret}
+ lgrC.WithField("Instance", inst.ID()).Info("created instance")
+ t.testInstance = &worker.TagVerifier{Instance: inst, Secret: t.secret, ReportVerified: nil}
t.showLoginInfo()
err = t.refreshTestInstance()
if err == errTestInstanceNotFound {
"Instance": i.ID(),
"Address": i.Address(),
}).Info("found our instance in returned list")
- t.testInstance = &worker.TagVerifier{i, t.secret}
+ t.testInstance = &worker.TagVerifier{Instance: i, Secret: t.secret, ReportVerified: nil}
if !t.showedLoginInfo {
t.showLoginInfo()
}
func (t *tester) getInstances(tags cloud.InstanceTags) ([]cloud.Instance, error) {
var ret []cloud.Instance
t.Logger.WithField("FilterTags", tags).Info("getting instance list")
+ t0 := time.Now()
insts, err := t.is.Instances(tags)
if err != nil {
return nil, err
}
- t.Logger.WithField("N", len(insts)).Info("got instance list")
+ t.Logger.WithFields(logrus.Fields{
+ "Duration": time.Since(t0),
+ "N": len(insts),
+ }).Info("got instance list")
for _, i := range insts {
if i.Tags()[t.TagKeyPrefix+"InstanceSetID"] == string(t.SetID) {
ret = append(ret, i)
// current address.
func (t *tester) updateExecutor() {
if t.executor == nil {
- t.executor = ssh_executor.New(t.testInstance)
+ t.executor = sshexecutor.New(t.testInstance)
t.executor.SetTargetPort(t.SSHPort)
t.executor.SetSigners(t.SSHKey)
} else {
t.Logger.WithFields(logrus.Fields{
"Command": cmd,
}).Info("executing remote command")
+ t0 := time.Now()
stdout, stderr, err := t.executor.Execute(nil, cmd, nil)
lgr := t.Logger.WithFields(logrus.Fields{
- "Command": cmd,
- "stdout": string(stdout),
- "stderr": string(stderr),
+ "Duration": time.Since(t0),
+ "Command": cmd,
+ "stdout": string(stdout),
+ "stderr": string(stderr),
})
if err != nil {
lgr.WithError(err).Info("remote command failed")
return true
}
for {
- t.Logger.WithField("Instance", t.testInstance.ID()).Info("destroying instance")
+ lgr := t.Logger.WithField("Instance", t.testInstance.ID())
+ lgr.Info("destroying instance")
+ t0 := time.Now()
+
err := t.testInstance.Destroy()
+ lgrDur := lgr.WithField("Duration", time.Since(t0))
if err != nil {
- t.Logger.WithError(err).WithField("Instance", t.testInstance.ID()).Error("error destroying instance")
+ lgrDur.WithError(err).Error("error destroying instance")
} else {
- t.Logger.WithField("Instance", t.testInstance.ID()).Info("destroyed instance")
+ lgrDur.Info("destroyed instance")
}
+
err = t.refreshTestInstance()
if err == errTestInstanceNotFound {
- t.Logger.WithField("Instance", t.testInstance.ID()).Info("instance no longer appears in list")
+ lgr.Info("instance no longer appears in list")
t.testInstance = nil
return true
} else if err == nil {
- t.Logger.WithField("Instance", t.testInstance.ID()).Info("instance still exists after calling Destroy")
+ lgr.Info("instance still exists after calling Destroy")
t.sleepSyncInterval()
continue
} else {