18102: Fix double-unlock in scheduler.
authorTom Clegg <tom@curii.com>
Mon, 6 Sep 2021 18:47:03 +0000 (14:47 -0400)
committerTom Clegg <tom@curii.com>
Mon, 6 Sep 2021 18:47:03 +0000 (14:47 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/dispatchcloud/scheduler/run_queue.go
lib/dispatchcloud/scheduler/run_queue_test.go
lib/dispatchcloud/test/queue.go

index b9d653a821e4b6650d2666e368414df43843e4b8..d2f6d1c2cb1b4eafa49a1421bae6c33eb9cb98d4 100644 (file)
@@ -66,8 +66,7 @@ tryrun:
                                // starve this one by using keeping
                                // idle workers alive on different
                                // instance types.
-                               logger.Debug("unlocking: AtQuota and no unalloc workers")
-                               sch.queue.Unlock(ctr.UUID)
+                               logger.Debug("overquota")
                                overquota = sorted[i:]
                                break tryrun
                        } else if logger.Info("creating new instance"); sch.pool.Create(it) {
index fd1d0a870b7ac9f34f9d1dd39f250fed62b4a099..c8d45cbd19c697946e9c8d0718a85bf951d5a90f 100644 (file)
@@ -244,15 +244,22 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
                        starts:    []string{},
                        canCreate: 0,
                }
-               New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond).runQueue()
+               sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond)
+               sch.runQueue()
+               sch.sync()
+               sch.runQueue()
+               sch.sync()
                c.Check(pool.creates, check.DeepEquals, shouldCreate)
                if len(shouldCreate) == 0 {
                        c.Check(pool.starts, check.DeepEquals, []string{})
-                       c.Check(pool.shutdowns, check.Not(check.Equals), 0)
                } else {
                        c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(2)})
-                       c.Check(pool.shutdowns, check.Equals, 0)
                }
+               c.Check(pool.shutdowns, check.Equals, 3-quota)
+               c.Check(queue.StateChanges(), check.DeepEquals, []test.QueueStateChange{
+                       {UUID: "zzzzz-dz642-000000000000003", From: "Locked", To: "Queued"},
+                       {UUID: "zzzzz-dz642-000000000000002", From: "Locked", To: "Queued"},
+               })
        }
 }
 
index 3598ec6da05baf23d3eaed302ec8db603f38e96c..5973d16390ce6c4326df184f74c0fac3505432af 100644 (file)
@@ -26,13 +26,27 @@ type Queue struct {
 
        Logger logrus.FieldLogger
 
-       entries     map[string]container.QueueEnt
-       updTime     time.Time
-       subscribers map[<-chan struct{}]chan struct{}
+       entries      map[string]container.QueueEnt
+       updTime      time.Time
+       subscribers  map[<-chan struct{}]chan struct{}
+       stateChanges []QueueStateChange
 
        mtx sync.Mutex
 }
 
+type QueueStateChange struct {
+       UUID string
+       From arvados.ContainerState
+       To   arvados.ContainerState
+}
+
+// All calls to Lock/Unlock/Cancel to date.
+func (q *Queue) StateChanges() []QueueStateChange {
+       q.mtx.Lock()
+       defer q.mtx.Unlock()
+       return q.stateChanges
+}
+
 // Entries returns the containers that were queued when Update was
 // last called.
 func (q *Queue) Entries() (map[string]container.QueueEnt, time.Time) {
@@ -111,6 +125,7 @@ func (q *Queue) notify() {
 // caller must have lock.
 func (q *Queue) changeState(uuid string, from, to arvados.ContainerState) error {
        ent := q.entries[uuid]
+       q.stateChanges = append(q.stateChanges, QueueStateChange{uuid, from, to})
        if ent.Container.State != from {
                return fmt.Errorf("changeState failed: state=%q", ent.Container.State)
        }