From cd68182c48ec70804a2acbf95331ec25629a9e28 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Mon, 17 Dec 2018 23:37:47 -0500 Subject: [PATCH] 14360: Avoid overreporting instances during Create/List race. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- lib/dispatchcloud/worker/pool.go | 55 +++++++++++++++++++-------- lib/dispatchcloud/worker/pool_test.go | 21 ++++------ lib/dispatchcloud/worker/worker.go | 1 + 3 files changed, 48 insertions(+), 29 deletions(-) diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go index a43b96ed82..722d4e918c 100644 --- a/lib/dispatchcloud/worker/pool.go +++ b/lib/dispatchcloud/worker/pool.go @@ -121,7 +121,7 @@ type Pool struct { // private state subscribers map[<-chan struct{}]chan<- struct{} - creating map[arvados.InstanceType]int // goroutines waiting for (InstanceSet)Create to return + creating map[arvados.InstanceType][]time.Time // start times of unfinished (InstanceSet)Create calls workers map[cloud.InstanceID]*worker loaded bool // loaded list of instances from InstanceSet at least once exited map[string]time.Time // containers whose crunch-run proc has exited, but KillContainer has not been called @@ -171,25 +171,41 @@ func (wp *Pool) Unsubscribe(ch <-chan struct{}) { // Unallocated returns the number of unallocated (creating + booting + // idle + unknown) workers for each instance type. -// -// The returned counts should be interpreted as upper bounds, rather -// than exact counts: they are sometimes artificially high when a -// newly created instance appears in the driver's Instances() list -// before the Create() call returns. func (wp *Pool) Unallocated() map[arvados.InstanceType]int { wp.setupOnce.Do(wp.setup) wp.mtx.RLock() defer wp.mtx.RUnlock() - u := map[arvados.InstanceType]int{} - for it, c := range wp.creating { - u[it] = c + unalloc := map[arvados.InstanceType]int{} + creating := map[arvados.InstanceType]int{} + for it, times := range wp.creating { + creating[it] = len(times) } for _, wkr := range wp.workers { - if wkr.state == StateIdle || wkr.state == StateBooting || wkr.state == StateUnknown { - u[wkr.instType]++ + if !(wkr.state == StateIdle || wkr.state == StateBooting || wkr.state == StateUnknown) { + continue } + it := wkr.instType + unalloc[it]++ + if wkr.state == StateUnknown && creating[it] > 0 && wkr.appeared.After(wp.creating[it][0]) { + // If up to N new workers appear in + // Instances() while we are waiting for N + // Create() calls to complete, we assume we're + // just seeing a race between Instances() and + // Create() responses. + // + // The other common reason why nodes have + // state==Unknown is that they appeared at + // startup, before any Create calls. They + // don't match the above timing condition, so + // we never mistakenly attribute them to + // pending Create calls. + creating[it]-- + } + } + for it, c := range creating { + unalloc[it] += c } - return u + return unalloc } // Create a new instance with the given type, and add it to the worker @@ -204,13 +220,21 @@ func (wp *Pool) Create(it arvados.InstanceType) error { return wp.atQuotaErr } tags := cloud.InstanceTags{tagKeyInstanceType: it.Name} - wp.creating[it]++ + now := time.Now() + wp.creating[it] = append(wp.creating[it], now) go func() { defer wp.notify() inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil) wp.mtx.Lock() defer wp.mtx.Unlock() - wp.creating[it]-- + // Remove our timestamp marker from wp.creating + for i, t := range wp.creating[it] { + if t == now { + copy(wp.creating[it][i:], wp.creating[it][i+1:]) + wp.creating[it] = wp.creating[it][:len(wp.creating[it])-1] + break + } + } if err, ok := err.(cloud.QuotaError); ok && err.IsQuotaError() { wp.atQuotaErr = err wp.atQuotaUntil = time.Now().Add(time.Minute) @@ -266,6 +290,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi state: initialState, instance: inst, instType: it, + appeared: now, probed: now, busy: now, updated: now, @@ -579,7 +604,7 @@ func (wp *Pool) Instances() []InstanceView { } func (wp *Pool) setup() { - wp.creating = map[arvados.InstanceType]int{} + wp.creating = map[arvados.InstanceType][]time.Time{} wp.exited = map[string]time.Time{} wp.workers = map[cloud.InstanceID]*worker{} wp.subscribers = map[<-chan struct{}]chan<- struct{}{} diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go index f8667e06cd..3867e2c63e 100644 --- a/lib/dispatchcloud/worker/pool_test.go +++ b/lib/dispatchcloud/worker/pool_test.go @@ -68,23 +68,16 @@ func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) { pool.Create(type2) c.Check(pool.Unallocated()[type1], check.Equals, 1) c.Check(pool.Unallocated()[type2], check.Equals, 2) - // Unblock the pending Create calls and (before calling Sync!) - // wait for the pool to process the returned instances. + + // Unblock the pending Create calls. go lameInstanceSet.Release(3) - suite.wait(c, pool, notify, func() bool { - list, err := lameInstanceSet.Instances(nil) - return err == nil && len(list) == 3 - }) - c.Check(pool.Unallocated()[type1], check.Equals, 1) - c.Check(pool.Unallocated()[type2], check.Equals, 2) - pool.getInstancesAndSync() - // Returned counts can be temporarily higher than 1 and 2 if - // poll ran before Create() returned. - c.Check(pool.Unallocated()[type1], check.Not(less), 1) - c.Check(pool.Unallocated()[type2], check.Not(less), 2) + // Wait for each instance to either return from its Create + // call, or show up in a poll. suite.wait(c, pool, notify, func() bool { - return pool.Unallocated()[type1] == 1 && pool.Unallocated()[type2] == 2 + pool.mtx.RLock() + defer pool.mtx.RUnlock() + return len(pool.workers) == 3 }) c.Check(pool.Shutdown(type2), check.Equals, true) diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go index 85104a13aa..a0a61c1597 100644 --- a/lib/dispatchcloud/worker/worker.go +++ b/lib/dispatchcloud/worker/worker.go @@ -64,6 +64,7 @@ type worker struct { instType arvados.InstanceType vcpus int64 memory int64 + appeared time.Time probed time.Time updated time.Time busy time.Time -- 2.30.2