X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/804ed25e843c38c4a5bf381f70dbaa0a61072a86..4257184a0fd276af7e1741dda8a7468a30b4a9c6:/lib/dispatchcloud/worker/pool_test.go diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go index 60e21b716b..4b87ce5031 100644 --- a/lib/dispatchcloud/worker/pool_test.go +++ b/lib/dispatchcloud/worker/pool_test.go @@ -12,8 +12,8 @@ import ( "git.curoverse.com/arvados.git/lib/cloud" "git.curoverse.com/arvados.git/lib/dispatchcloud/test" "git.curoverse.com/arvados.git/sdk/go/arvados" + "git.curoverse.com/arvados.git/sdk/go/ctxlog" "github.com/prometheus/client_golang/prometheus" - "github.com/sirupsen/logrus" check "gopkg.in/check.v1" ) @@ -33,10 +33,6 @@ var less = &lessChecker{&check.CheckerInfo{Name: "less", Params: []string{"obtai type PoolSuite struct{} -func (suite *PoolSuite) SetUpSuite(c *check.C) { - logrus.StandardLogger().SetLevel(logrus.DebugLevel) -} - func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) { type1 := test.InstanceType(1) type2 := test.InstanceType(2) @@ -67,9 +63,10 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) { } } - logger := logrus.StandardLogger() + logger := ctxlog.TestLogger(c) driver := &test.StubDriver{} - is, err := driver.InstanceSet(nil, "", logger) + instanceSetID := cloud.InstanceSetID("test-instance-set-id") + is, err := driver.InstanceSet(nil, instanceSetID, nil, logger) c.Assert(err, check.IsNil) newExecutor := func(cloud.Instance) Executor { @@ -80,13 +77,14 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) { } cluster := &arvados.Cluster{ - Dispatch: arvados.Dispatch{ - MaxProbesPerSecond: 1000, - ProbeInterval: arvados.Duration(time.Millisecond * 10), - }, - CloudVMs: arvados.CloudVMs{ - BootProbeCommand: "true", - SyncInterval: arvados.Duration(time.Millisecond * 10), + Containers: arvados.ContainersConfig{ + CloudVMs: arvados.CloudVMsConfig{ + BootProbeCommand: "true", + MaxProbesPerSecond: 1000, + ProbeInterval: arvados.Duration(time.Millisecond * 10), + SyncInterval: arvados.Duration(time.Millisecond * 10), + TagKeyPrefix: "testprefix:", + }, }, InstanceTypes: arvados.InstanceTypeMap{ type1.Name: type1, @@ -95,7 +93,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) { }, } - pool := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, cluster) + pool := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), instanceSetID, is, newExecutor, nil, cluster) notify := pool.Subscribe() defer pool.Unsubscribe(notify) pool.Create(type1) @@ -109,11 +107,29 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) { pool.SetIdleBehavior(heldInstanceID, IdleBehaviorHold) } } + // Wait for the tags to save to the cloud provider + tagKey := cluster.Containers.CloudVMs.TagKeyPrefix + tagKeyIdleBehavior + deadline := time.Now().Add(time.Second) + for !func() bool { + pool.mtx.RLock() + defer pool.mtx.RUnlock() + for _, wkr := range pool.workers { + if wkr.instType == type2 { + return wkr.instance.Tags()[tagKey] == string(IdleBehaviorHold) + } + } + return false + }() { + if time.Now().After(deadline) { + c.Fatal("timeout") + } + time.Sleep(time.Millisecond * 10) + } pool.Stop() c.Log("------- starting new pool, waiting to recover state") - pool2 := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, cluster) + pool2 := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), instanceSetID, is, newExecutor, nil, cluster) notify2 := pool2.Subscribe() defer pool2.Unsubscribe(notify2) waitForIdle(pool2, notify2) @@ -129,14 +145,18 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) { } func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) { - lameInstanceSet := &test.LameInstanceSet{Hold: make(chan bool)} + logger := ctxlog.TestLogger(c) + driver := test.StubDriver{HoldCloudOps: true} + instanceSet, err := driver.InstanceSet(nil, "test-instance-set-id", nil, logger) + c.Assert(err, check.IsNil) + type1 := arvados.InstanceType{Name: "a1s", ProviderType: "a1.small", VCPUs: 1, RAM: 1 * GiB, Price: .01} type2 := arvados.InstanceType{Name: "a2m", ProviderType: "a2.medium", VCPUs: 2, RAM: 2 * GiB, Price: .02} type3 := arvados.InstanceType{Name: "a2l", ProviderType: "a2.large", VCPUs: 4, RAM: 4 * GiB, Price: .04} pool := &Pool{ - logger: logrus.StandardLogger(), + logger: logger, newExecutor: func(cloud.Instance) Executor { return stubExecutor{} }, - instanceSet: &throttledInstanceSet{InstanceSet: lameInstanceSet}, + instanceSet: &throttledInstanceSet{InstanceSet: instanceSet}, instanceTypes: arvados.InstanceTypeMap{ type1.Name: type1, type2.Name: type2, @@ -160,7 +180,7 @@ func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) { c.Check(pool.Unallocated()[type3], check.Equals, 1) // Unblock the pending Create calls. - go lameInstanceSet.Release(4) + go driver.ReleaseCloudOps(4) // Wait for each instance to either return from its Create // call, or show up in a poll. @@ -174,7 +194,7 @@ func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) { ivs := suite.instancesByType(pool, type3) c.Assert(ivs, check.HasLen, 1) type3instanceID := ivs[0].Instance - err := pool.SetIdleBehavior(type3instanceID, IdleBehaviorHold) + err = pool.SetIdleBehavior(type3instanceID, IdleBehaviorHold) c.Check(err, check.IsNil) // Check admin-hold behavior: refuse to shutdown, and don't @@ -240,7 +260,7 @@ func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) { // if a node still appears in the provider list after a // previous attempt, so there might be more than 4 Destroy // calls to unblock. - go lameInstanceSet.Release(4444) + go driver.ReleaseCloudOps(4444) // Sync until all instances disappear from the provider list. suite.wait(c, pool, notify, func() bool {