X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/e231fef37079916d0dc6babf93d669d474598ced..bdeca9098f356a4b2b088a3cf4c9276bb6cd5bdb:/lib/dispatchcloud/scheduler/run_queue_test.go diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go index 4296a1364c..530eb5db93 100644 --- a/lib/dispatchcloud/scheduler/run_queue_test.go +++ b/lib/dispatchcloud/scheduler/run_queue_test.go @@ -9,10 +9,10 @@ import ( "sync" "time" - "git.curoverse.com/arvados.git/lib/dispatchcloud/test" - "git.curoverse.com/arvados.git/lib/dispatchcloud/worker" - "git.curoverse.com/arvados.git/sdk/go/arvados" - "git.curoverse.com/arvados.git/sdk/go/ctxlog" + "git.arvados.org/arvados.git/lib/dispatchcloud/test" + "git.arvados.org/arvados.git/lib/dispatchcloud/worker" + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/ctxlog" check "gopkg.in/check.v1" ) @@ -36,8 +36,9 @@ type stubPool struct { notify <-chan struct{} unalloc map[arvados.InstanceType]int // idle+booting+unknown idle map[arvados.InstanceType]int + unknown map[arvados.InstanceType]int running map[string]time.Time - atQuota bool + quota int canCreate int creates []arvados.InstanceType starts []string @@ -45,7 +46,11 @@ type stubPool struct { sync.Mutex } -func (p *stubPool) AtQuota() bool { return p.atQuota } +func (p *stubPool) AtQuota() bool { + p.Lock() + defer p.Unlock() + return len(p.unalloc)+len(p.running)+len(p.unknown) >= p.quota +} func (p *stubPool) Subscribe() <-chan struct{} { return p.notify } func (p *stubPool) Unsubscribe(<-chan struct{}) {} func (p *stubPool) Running() map[string]time.Time { @@ -62,7 +67,7 @@ func (p *stubPool) Unallocated() map[arvados.InstanceType]int { defer p.Unlock() r := map[arvados.InstanceType]int{} for it, n := range p.unalloc { - r[it] = n + r[it] = n - p.unknown[it] } return r } @@ -77,10 +82,14 @@ func (p *stubPool) Create(it arvados.InstanceType) bool { p.unalloc[it]++ return true } -func (p *stubPool) KillContainer(uuid string) { +func (p *stubPool) ForgetContainer(uuid string) { +} +func (p *stubPool) KillContainer(uuid, reason string) bool { p.Lock() defer p.Unlock() - delete(p.running, uuid) + defer delete(p.running, uuid) + t, ok := p.running[uuid] + return ok && t.IsZero() } func (p *stubPool) Shutdown(arvados.InstanceType) bool { p.shutdowns++ @@ -93,6 +102,7 @@ func (p *stubPool) CountWorkers() map[worker.State]int { worker.StateBooting: len(p.unalloc) - len(p.idle), worker.StateIdle: len(p.idle), worker.StateRunning: len(p.running), + worker.StateUnknown: len(p.unknown), } } func (p *stubPool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool { @@ -116,11 +126,8 @@ var _ = check.Suite(&SchedulerSuite{}) type SchedulerSuite struct{} -// Assign priority=4 container to idle node. Create a new instance for -// the priority=3 container. Don't try to start any priority<3 -// containers because priority=3 container didn't start -// immediately. Don't try to create any other nodes after the failed -// create. +// Assign priority=4 container to idle node. Create new instances for +// the priority=3, 2, 1 containers. func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) { ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c)) queue := test.Queue{ @@ -166,6 +173,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) { } queue.Update() pool := stubPool{ + quota: 1000, unalloc: map[arvados.InstanceType]int{ test.InstanceType(1): 1, test.InstanceType(2): 2, @@ -178,7 +186,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) { canCreate: 0, } New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue() - c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1)}) + c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1), test.InstanceType(1), test.InstanceType(1)}) c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4)}) c.Check(pool.running, check.HasLen, 1) for uuid := range pool.running { @@ -186,14 +194,14 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) { } } -// If Create() fails, shutdown some nodes, and don't call Create() -// again. Don't call Create() at all if AtQuota() is true. +// If pool.AtQuota() is true, shutdown some unalloc nodes, and don't +// call Create(). func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) { ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c)) - for quota := 0; quota < 2; quota++ { + for quota := 1; quota < 3; quota++ { c.Logf("quota=%d", quota) shouldCreate := []arvados.InstanceType{} - for i := 0; i < quota; i++ { + for i := 1; i < quota; i++ { shouldCreate = append(shouldCreate, test.InstanceType(3)) } queue := test.Queue{ @@ -221,7 +229,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) { } queue.Update() pool := stubPool{ - atQuota: quota == 0, + quota: quota, unalloc: map[arvados.InstanceType]int{ test.InstanceType(2): 2, }, @@ -235,8 +243,13 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) { } New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue() c.Check(pool.creates, check.DeepEquals, shouldCreate) - c.Check(pool.starts, check.DeepEquals, []string{}) - c.Check(pool.shutdowns, check.Not(check.Equals), 0) + if len(shouldCreate) == 0 { + c.Check(pool.starts, check.DeepEquals, []string{}) + c.Check(pool.shutdowns, check.Not(check.Equals), 0) + } else { + c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(2)}) + c.Check(pool.shutdowns, check.Equals, 0) + } } } @@ -245,6 +258,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) { func (*SchedulerSuite) TestStartWhileCreating(c *check.C) { ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c)) pool := stubPool{ + quota: 1000, unalloc: map[arvados.InstanceType]int{ test.InstanceType(1): 2, test.InstanceType(2): 2, @@ -335,3 +349,41 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) { } c.Check(running, check.DeepEquals, map[string]bool{uuids[3]: false, uuids[6]: false}) } + +func (*SchedulerSuite) TestKillNonexistentContainer(c *check.C) { + ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c)) + pool := stubPool{ + quota: 1000, + unalloc: map[arvados.InstanceType]int{ + test.InstanceType(2): 0, + }, + idle: map[arvados.InstanceType]int{ + test.InstanceType(2): 0, + }, + running: map[string]time.Time{ + test.ContainerUUID(2): {}, + }, + } + queue := test.Queue{ + ChooseType: chooseType, + Containers: []arvados.Container{ + { + // create a new worker + UUID: test.ContainerUUID(1), + Priority: 1, + State: arvados.ContainerStateLocked, + RuntimeConstraints: arvados.RuntimeConstraints{ + VCPUs: 1, + RAM: 1 << 30, + }, + }, + }, + } + queue.Update() + sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond) + c.Check(pool.running, check.HasLen, 1) + sch.sync() + for deadline := time.Now().Add(time.Second); len(pool.Running()) > 0 && time.Now().Before(deadline); time.Sleep(time.Millisecond) { + } + c.Check(pool.Running(), check.HasLen, 0) +}