From cd68182c48ec70804a2acbf95331ec25629a9e28 Mon Sep 17 00:00:00 2001
From: Tom Clegg <tclegg@veritasgenetics.com>
Date: Mon, 17 Dec 2018 23:37:47 -0500
Subject: [PATCH] 14360: Avoid overreporting instances during Create/List race.

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>
---
 lib/dispatchcloud/worker/pool.go      | 55 +++++++++++++++++++--------
 lib/dispatchcloud/worker/pool_test.go | 21 ++++------
 lib/dispatchcloud/worker/worker.go    |  1 +
 3 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index a43b96ed82..722d4e918c 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -121,7 +121,7 @@ type Pool struct {
 
 	// private state
 	subscribers  map[<-chan struct{}]chan<- struct{}
-	creating     map[arvados.InstanceType]int // goroutines waiting for (InstanceSet)Create to return
+	creating     map[arvados.InstanceType][]time.Time // start times of unfinished (InstanceSet)Create calls
 	workers      map[cloud.InstanceID]*worker
 	loaded       bool                 // loaded list of instances from InstanceSet at least once
 	exited       map[string]time.Time // containers whose crunch-run proc has exited, but KillContainer has not been called
@@ -171,25 +171,41 @@ func (wp *Pool) Unsubscribe(ch <-chan struct{}) {
 
 // Unallocated returns the number of unallocated (creating + booting +
 // idle + unknown) workers for each instance type.
-//
-// The returned counts should be interpreted as upper bounds, rather
-// than exact counts: they are sometimes artificially high when a
-// newly created instance appears in the driver's Instances() list
-// before the Create() call returns.
 func (wp *Pool) Unallocated() map[arvados.InstanceType]int {
 	wp.setupOnce.Do(wp.setup)
 	wp.mtx.RLock()
 	defer wp.mtx.RUnlock()
-	u := map[arvados.InstanceType]int{}
-	for it, c := range wp.creating {
-		u[it] = c
+	unalloc := map[arvados.InstanceType]int{}
+	creating := map[arvados.InstanceType]int{}
+	for it, times := range wp.creating {
+		creating[it] = len(times)
 	}
 	for _, wkr := range wp.workers {
-		if wkr.state == StateIdle || wkr.state == StateBooting || wkr.state == StateUnknown {
-			u[wkr.instType]++
+		if !(wkr.state == StateIdle || wkr.state == StateBooting || wkr.state == StateUnknown) {
+			continue
 		}
+		it := wkr.instType
+		unalloc[it]++
+		if wkr.state == StateUnknown && creating[it] > 0 && wkr.appeared.After(wp.creating[it][0]) {
+			// If up to N new workers appear in
+			// Instances() while we are waiting for N
+			// Create() calls to complete, we assume we're
+			// just seeing a race between Instances() and
+			// Create() responses.
+			//
+			// The other common reason why nodes have
+			// state==Unknown is that they appeared at
+			// startup, before any Create calls. They
+			// don't match the above timing condition, so
+			// we never mistakenly attribute them to
+			// pending Create calls.
+			creating[it]--
+		}
+	}
+	for it, c := range creating {
+		unalloc[it] += c
 	}
-	return u
+	return unalloc
 }
 
 // Create a new instance with the given type, and add it to the worker
@@ -204,13 +220,21 @@ func (wp *Pool) Create(it arvados.InstanceType) error {
 		return wp.atQuotaErr
 	}
 	tags := cloud.InstanceTags{tagKeyInstanceType: it.Name}
-	wp.creating[it]++
+	now := time.Now()
+	wp.creating[it] = append(wp.creating[it], now)
 	go func() {
 		defer wp.notify()
 		inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil)
 		wp.mtx.Lock()
 		defer wp.mtx.Unlock()
-		wp.creating[it]--
+		// Remove our timestamp marker from wp.creating
+		for i, t := range wp.creating[it] {
+			if t == now {
+				copy(wp.creating[it][i:], wp.creating[it][i+1:])
+				wp.creating[it] = wp.creating[it][:len(wp.creating[it])-1]
+				break
+			}
+		}
 		if err, ok := err.(cloud.QuotaError); ok && err.IsQuotaError() {
 			wp.atQuotaErr = err
 			wp.atQuotaUntil = time.Now().Add(time.Minute)
@@ -266,6 +290,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
 		state:    initialState,
 		instance: inst,
 		instType: it,
+		appeared: now,
 		probed:   now,
 		busy:     now,
 		updated:  now,
@@ -579,7 +604,7 @@ func (wp *Pool) Instances() []InstanceView {
 }
 
 func (wp *Pool) setup() {
-	wp.creating = map[arvados.InstanceType]int{}
+	wp.creating = map[arvados.InstanceType][]time.Time{}
 	wp.exited = map[string]time.Time{}
 	wp.workers = map[cloud.InstanceID]*worker{}
 	wp.subscribers = map[<-chan struct{}]chan<- struct{}{}
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
index f8667e06cd..3867e2c63e 100644
--- a/lib/dispatchcloud/worker/pool_test.go
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -68,23 +68,16 @@ func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) {
 	pool.Create(type2)
 	c.Check(pool.Unallocated()[type1], check.Equals, 1)
 	c.Check(pool.Unallocated()[type2], check.Equals, 2)
-	// Unblock the pending Create calls and (before calling Sync!)
-	// wait for the pool to process the returned instances.
+
+	// Unblock the pending Create calls.
 	go lameInstanceSet.Release(3)
-	suite.wait(c, pool, notify, func() bool {
-		list, err := lameInstanceSet.Instances(nil)
-		return err == nil && len(list) == 3
-	})
 
-	c.Check(pool.Unallocated()[type1], check.Equals, 1)
-	c.Check(pool.Unallocated()[type2], check.Equals, 2)
-	pool.getInstancesAndSync()
-	// Returned counts can be temporarily higher than 1 and 2 if
-	// poll ran before Create() returned.
-	c.Check(pool.Unallocated()[type1], check.Not(less), 1)
-	c.Check(pool.Unallocated()[type2], check.Not(less), 2)
+	// Wait for each instance to either return from its Create
+	// call, or show up in a poll.
 	suite.wait(c, pool, notify, func() bool {
-		return pool.Unallocated()[type1] == 1 && pool.Unallocated()[type2] == 2
+		pool.mtx.RLock()
+		defer pool.mtx.RUnlock()
+		return len(pool.workers) == 3
 	})
 
 	c.Check(pool.Shutdown(type2), check.Equals, true)
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index 85104a13aa..a0a61c1597 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -64,6 +64,7 @@ type worker struct {
 	instType  arvados.InstanceType
 	vcpus     int64
 	memory    int64
+	appeared  time.Time
 	probed    time.Time
 	updated   time.Time
 	busy      time.Time
-- 
2.30.2