X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/7f4da9388e3d5ec8f38f6d6408916d1d46dfb10f..25367eb306ce9816762f1dc06ff3ab754d924502:/lib/dispatchcloud/scheduler/run_queue_test.go diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go index 6d70364989..5b5fa960a1 100644 --- a/lib/dispatchcloud/scheduler/run_queue_test.go +++ b/lib/dispatchcloud/scheduler/run_queue_test.go @@ -244,15 +244,81 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) { starts: []string{}, canCreate: 0, } - New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond).runQueue() + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) + sch.runQueue() + sch.sync() + sch.runQueue() + sch.sync() c.Check(pool.creates, check.DeepEquals, shouldCreate) if len(shouldCreate) == 0 { c.Check(pool.starts, check.DeepEquals, []string{}) - c.Check(pool.shutdowns, check.Not(check.Equals), 0) } else { c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(2)}) - c.Check(pool.shutdowns, check.Equals, 0) } + c.Check(pool.shutdowns, check.Equals, 3-quota) + c.Check(queue.StateChanges(), check.DeepEquals, []test.QueueStateChange{ + {UUID: "zzzzz-dz642-000000000000003", From: "Locked", To: "Queued"}, + {UUID: "zzzzz-dz642-000000000000002", From: "Locked", To: "Queued"}, + }) + } +} + +// Don't flap lock/unlock when equal-priority containers compete for +// limited workers. +// +// (Unless we use FirstSeenAt as a secondary sort key, each runQueue() +// tends to choose a different one of the equal-priority containers as +// the "first" one that should be locked, and unlock the one it chose +// last time. This generates logging noise, and fails containers by +// reaching MaxDispatchAttempts quickly.) +func (*SchedulerSuite) TestEqualPriorityContainers(c *check.C) { + logger := ctxlog.TestLogger(c) + ctx := ctxlog.Context(context.Background(), logger) + queue := test.Queue{ + ChooseType: chooseType, + Logger: logger, + } + for i := 0; i < 8; i++ { + queue.Containers = append(queue.Containers, arvados.Container{ + UUID: test.ContainerUUID(i), + Priority: 333, + State: arvados.ContainerStateQueued, + RuntimeConstraints: arvados.RuntimeConstraints{ + VCPUs: 3, + RAM: 3 << 30, + }, + }) + } + queue.Update() + pool := stubPool{ + quota: 2, + unalloc: map[arvados.InstanceType]int{ + test.InstanceType(3): 1, + }, + idle: map[arvados.InstanceType]int{ + test.InstanceType(3): 1, + }, + running: map[string]time.Time{}, + creates: []arvados.InstanceType{}, + starts: []string{}, + canCreate: 1, + } + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) + for i := 0; i < 30; i++ { + sch.runQueue() + sch.sync() + time.Sleep(time.Millisecond) + } + c.Check(pool.shutdowns, check.Equals, 0) + c.Check(pool.starts, check.HasLen, 1) + unlocked := map[string]int{} + for _, chg := range queue.StateChanges() { + if chg.To == arvados.ContainerStateQueued { + unlocked[chg.UUID]++ + } + } + for uuid, count := range unlocked { + c.Check(count, check.Equals, 1, check.Commentf("%s", uuid)) } } @@ -391,15 +457,16 @@ func (*SchedulerSuite) TestKillNonexistentContainer(c *check.C) { c.Check(pool.Running(), check.HasLen, 0) } -func (*SchedulerSuite) TestContainersAllocatedNotStartedMetric(c *check.C) { +func (*SchedulerSuite) TestContainersMetrics(c *check.C) { ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c)) queue := test.Queue{ ChooseType: chooseType, Containers: []arvados.Container{ { - UUID: test.ContainerUUID(1), - Priority: 1, - State: arvados.ContainerStateLocked, + UUID: test.ContainerUUID(1), + Priority: 1, + State: arvados.ContainerStateLocked, + CreatedAt: time.Now().Add(-10 * time.Second), RuntimeConstraints: arvados.RuntimeConstraints{ VCPUs: 1, RAM: 1 << 30, @@ -417,9 +484,11 @@ func (*SchedulerSuite) TestContainersAllocatedNotStartedMetric(c *check.C) { } sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) sch.runQueue() + sch.updateMetrics() c.Check(int(testutil.ToFloat64(sch.mContainersAllocatedNotStarted)), check.Equals, 1) c.Check(int(testutil.ToFloat64(sch.mContainersNotAllocatedOverQuota)), check.Equals, 0) + c.Check(int(testutil.ToFloat64(sch.mLongestWaitTimeSinceQueue)), check.Equals, 10) // Create a pool without workers. The queued container will not be started, and the // 'over quota' metric will be 1 because no workers are available and canCreate defaults @@ -427,7 +496,40 @@ func (*SchedulerSuite) TestContainersAllocatedNotStartedMetric(c *check.C) { pool = stubPool{} sch = New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) sch.runQueue() + sch.updateMetrics() c.Check(int(testutil.ToFloat64(sch.mContainersAllocatedNotStarted)), check.Equals, 0) c.Check(int(testutil.ToFloat64(sch.mContainersNotAllocatedOverQuota)), check.Equals, 1) + c.Check(int(testutil.ToFloat64(sch.mLongestWaitTimeSinceQueue)), check.Equals, 10) + + // Reset the queue, and create a pool with an idle worker. The queued + // container will be started immediately and mLongestWaitTimeSinceQueue + // should be zero. + queue = test.Queue{ + ChooseType: chooseType, + Containers: []arvados.Container{ + { + UUID: test.ContainerUUID(1), + Priority: 1, + State: arvados.ContainerStateLocked, + CreatedAt: time.Now().Add(-10 * time.Second), + RuntimeConstraints: arvados.RuntimeConstraints{ + VCPUs: 1, + RAM: 1 << 30, + }, + }, + }, + } + queue.Update() + + pool = stubPool{ + idle: map[arvados.InstanceType]int{test.InstanceType(1): 1}, + unalloc: map[arvados.InstanceType]int{test.InstanceType(1): 1}, + running: map[string]time.Time{}, + } + sch = New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) + sch.runQueue() + sch.updateMetrics() + + c.Check(int(testutil.ToFloat64(sch.mLongestWaitTimeSinceQueue)), check.Equals, 0) }