Merge branch '15964-fix-docs' refs #15964
[arvados.git] / lib / dispatchcloud / scheduler / run_queue_test.go
index 7dd6866c0f389e51a393300f9235053159582d51..992edddfba6370198a16def5a6b57aed18575aa4 100644 (file)
@@ -5,12 +5,14 @@
 package scheduler
 
 import (
+       "context"
        "sync"
        "time"
 
-       "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
-       "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/lib/dispatchcloud/test"
+       "git.arvados.org/arvados.git/lib/dispatchcloud/worker"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
        check "gopkg.in/check.v1"
 )
 
@@ -34,6 +36,7 @@ type stubPool struct {
        notify    <-chan struct{}
        unalloc   map[arvados.InstanceType]int // idle+booting+unknown
        idle      map[arvados.InstanceType]int
+       unknown   map[arvados.InstanceType]int
        running   map[string]time.Time
        atQuota   bool
        canCreate int
@@ -60,7 +63,7 @@ func (p *stubPool) Unallocated() map[arvados.InstanceType]int {
        defer p.Unlock()
        r := map[arvados.InstanceType]int{}
        for it, n := range p.unalloc {
-               r[it] = n
+               r[it] = n - p.unknown[it]
        }
        return r
 }
@@ -75,10 +78,14 @@ func (p *stubPool) Create(it arvados.InstanceType) bool {
        p.unalloc[it]++
        return true
 }
-func (p *stubPool) KillContainer(uuid string) {
+func (p *stubPool) ForgetContainer(uuid string) {
+}
+func (p *stubPool) KillContainer(uuid, reason string) bool {
        p.Lock()
        defer p.Unlock()
-       delete(p.running, uuid)
+       defer delete(p.running, uuid)
+       t, ok := p.running[uuid]
+       return ok && t.IsZero()
 }
 func (p *stubPool) Shutdown(arvados.InstanceType) bool {
        p.shutdowns++
@@ -91,6 +98,7 @@ func (p *stubPool) CountWorkers() map[worker.State]int {
                worker.StateBooting: len(p.unalloc) - len(p.idle),
                worker.StateIdle:    len(p.idle),
                worker.StateRunning: len(p.running),
+               worker.StateUnknown: len(p.unknown),
        }
 }
 func (p *stubPool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
@@ -120,6 +128,7 @@ type SchedulerSuite struct{}
 // immediately. Don't try to create any other nodes after the failed
 // create.
 func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
        queue := test.Queue{
                ChooseType: chooseType,
                Containers: []arvados.Container{
@@ -174,7 +183,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
                running:   map[string]time.Time{},
                canCreate: 0,
        }
-       New(test.Logger(), &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+       New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
        c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1)})
        c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4)})
        c.Check(pool.running, check.HasLen, 1)
@@ -186,6 +195,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
 // If Create() fails, shutdown some nodes, and don't call Create()
 // again.  Don't call Create() at all if AtQuota() is true.
 func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
        for quota := 0; quota < 2; quota++ {
                c.Logf("quota=%d", quota)
                shouldCreate := []arvados.InstanceType{}
@@ -229,7 +239,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
                        starts:    []string{},
                        canCreate: 0,
                }
-               New(test.Logger(), &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+               New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
                c.Check(pool.creates, check.DeepEquals, shouldCreate)
                c.Check(pool.starts, check.DeepEquals, []string{})
                c.Check(pool.shutdowns, check.Not(check.Equals), 0)
@@ -239,6 +249,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
 // Start lower-priority containers while waiting for new/existing
 // workers to come up for higher-priority containers.
 func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
        pool := stubPool{
                unalloc: map[arvados.InstanceType]int{
                        test.InstanceType(1): 2,
@@ -317,7 +328,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
                },
        }
        queue.Update()
-       New(test.Logger(), &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+       New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
        c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(2), test.InstanceType(1)})
        c.Check(pool.starts, check.DeepEquals, []string{uuids[6], uuids[5], uuids[3], uuids[2]})
        running := map[string]bool{}
@@ -330,3 +341,40 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
        }
        c.Check(running, check.DeepEquals, map[string]bool{uuids[3]: false, uuids[6]: false})
 }
+
+func (*SchedulerSuite) TestKillNonexistentContainer(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
+       pool := stubPool{
+               unalloc: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 0,
+               },
+               idle: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 0,
+               },
+               running: map[string]time.Time{
+                       test.ContainerUUID(2): time.Time{},
+               },
+       }
+       queue := test.Queue{
+               ChooseType: chooseType,
+               Containers: []arvados.Container{
+                       {
+                               // create a new worker
+                               UUID:     test.ContainerUUID(1),
+                               Priority: 1,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 1,
+                                       RAM:   1 << 30,
+                               },
+                       },
+               },
+       }
+       queue.Update()
+       sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond)
+       c.Check(pool.running, check.HasLen, 1)
+       sch.sync()
+       for deadline := time.Now().Add(time.Second); len(pool.Running()) > 0 && time.Now().Before(deadline); time.Sleep(time.Millisecond) {
+       }
+       c.Check(pool.Running(), check.HasLen, 0)
+}