X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/2f66d4cc05e9442a9bb69969744d0750a02a1ed4..042f47a2c6b5f3db80142164b6493c873aca0b26:/lib/dispatchcloud/scheduler/sync_test.go diff --git a/lib/dispatchcloud/scheduler/sync_test.go b/lib/dispatchcloud/scheduler/sync_test.go index 305ab9e04e..a3ff0636e1 100644 --- a/lib/dispatchcloud/scheduler/sync_test.go +++ b/lib/dispatchcloud/scheduler/sync_test.go @@ -48,9 +48,71 @@ func (*SchedulerSuite) TestForgetIrrelevantContainers(c *check.C) { ents, _ := queue.Entries() c.Check(ents, check.HasLen, 1) - sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond) + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) sch.sync() ents, _ = queue.Entries() c.Check(ents, check.HasLen, 0) } + +func (*SchedulerSuite) TestCancelOrphanedContainers(c *check.C) { + ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c)) + pool := stubPool{ + unalloc: map[arvados.InstanceType]int{test.InstanceType(1): 1}, + unknown: map[arvados.InstanceType]int{test.InstanceType(1): 1}, + } + queue := test.Queue{ + ChooseType: chooseType, + Containers: []arvados.Container{ + { + UUID: test.ContainerUUID(1), + Priority: 0, + State: arvados.ContainerStateRunning, + RuntimeConstraints: arvados.RuntimeConstraints{ + VCPUs: 1, + RAM: 1 << 30, + }, + }, + }, + } + queue.Update() + + ents, _ := queue.Entries() + c.Check(ents, check.HasLen, 1) + + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) + + // Sync shouldn't cancel the container because it might be + // running on the VM with state=="unknown". + // + // (Cancel+forget happens asynchronously and requires multiple + // sync() calls, so even after 10x sync-and-sleep iterations, + // we aren't 100% confident that sync isn't trying to + // cancel. But in the test environment, the goroutines started + // by sync() access stubs and therefore run quickly, so it + // works fine in practice. We accept that if the code is + // broken, the test will still pass occasionally.) + for i := 0; i < 10; i++ { + sch.sync() + time.Sleep(time.Millisecond) + } + ents, _ = queue.Entries() + c.Check(ents, check.HasLen, 1) + c.Check(ents[test.ContainerUUID(1)].Container.State, check.Equals, arvados.ContainerStateRunning) + + // Sync should cancel & forget the container when the + // "unknown" node goes away. + // + // (As above, cancel+forget is async and requires multiple + // sync() calls, but stubs are fast so in practice this takes + // much less than 1s to complete.) + pool.unknown = nil + for deadline := time.Now().Add(time.Second); ; time.Sleep(time.Millisecond) { + sch.sync() + ents, _ = queue.Entries() + if len(ents) == 0 || time.Now().After(deadline) { + break + } + } + c.Check(ents, check.HasLen, 0) +}