14360: Avoid overreporting instances during Create/List race.
authorTom Clegg <tclegg@veritasgenetics.com>
Tue, 18 Dec 2018 04:37:47 +0000 (23:37 -0500)
committerTom Clegg <tclegg@veritasgenetics.com>
Tue, 18 Dec 2018 04:37:47 +0000 (23:37 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

lib/dispatchcloud/worker/pool.go
lib/dispatchcloud/worker/pool_test.go
lib/dispatchcloud/worker/worker.go

index a43b96ed82f6f0f05628bdc87fef9518db2cc78a..722d4e918c18061f2c2e41234ac87b6a1e6b4041 100644 (file)
@@ -121,7 +121,7 @@ type Pool struct {
 
        // private state
        subscribers  map[<-chan struct{}]chan<- struct{}
-       creating     map[arvados.InstanceType]int // goroutines waiting for (InstanceSet)Create to return
+       creating     map[arvados.InstanceType][]time.Time // start times of unfinished (InstanceSet)Create calls
        workers      map[cloud.InstanceID]*worker
        loaded       bool                 // loaded list of instances from InstanceSet at least once
        exited       map[string]time.Time // containers whose crunch-run proc has exited, but KillContainer has not been called
@@ -171,25 +171,41 @@ func (wp *Pool) Unsubscribe(ch <-chan struct{}) {
 
 // Unallocated returns the number of unallocated (creating + booting +
 // idle + unknown) workers for each instance type.
-//
-// The returned counts should be interpreted as upper bounds, rather
-// than exact counts: they are sometimes artificially high when a
-// newly created instance appears in the driver's Instances() list
-// before the Create() call returns.
 func (wp *Pool) Unallocated() map[arvados.InstanceType]int {
        wp.setupOnce.Do(wp.setup)
        wp.mtx.RLock()
        defer wp.mtx.RUnlock()
-       u := map[arvados.InstanceType]int{}
-       for it, c := range wp.creating {
-               u[it] = c
+       unalloc := map[arvados.InstanceType]int{}
+       creating := map[arvados.InstanceType]int{}
+       for it, times := range wp.creating {
+               creating[it] = len(times)
        }
        for _, wkr := range wp.workers {
-               if wkr.state == StateIdle || wkr.state == StateBooting || wkr.state == StateUnknown {
-                       u[wkr.instType]++
+               if !(wkr.state == StateIdle || wkr.state == StateBooting || wkr.state == StateUnknown) {
+                       continue
                }
+               it := wkr.instType
+               unalloc[it]++
+               if wkr.state == StateUnknown && creating[it] > 0 && wkr.appeared.After(wp.creating[it][0]) {
+                       // If up to N new workers appear in
+                       // Instances() while we are waiting for N
+                       // Create() calls to complete, we assume we're
+                       // just seeing a race between Instances() and
+                       // Create() responses.
+                       //
+                       // The other common reason why nodes have
+                       // state==Unknown is that they appeared at
+                       // startup, before any Create calls. They
+                       // don't match the above timing condition, so
+                       // we never mistakenly attribute them to
+                       // pending Create calls.
+                       creating[it]--
+               }
+       }
+       for it, c := range creating {
+               unalloc[it] += c
        }
-       return u
+       return unalloc
 }
 
 // Create a new instance with the given type, and add it to the worker
@@ -204,13 +220,21 @@ func (wp *Pool) Create(it arvados.InstanceType) error {
                return wp.atQuotaErr
        }
        tags := cloud.InstanceTags{tagKeyInstanceType: it.Name}
-       wp.creating[it]++
+       now := time.Now()
+       wp.creating[it] = append(wp.creating[it], now)
        go func() {
                defer wp.notify()
                inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil)
                wp.mtx.Lock()
                defer wp.mtx.Unlock()
-               wp.creating[it]--
+               // Remove our timestamp marker from wp.creating
+               for i, t := range wp.creating[it] {
+                       if t == now {
+                               copy(wp.creating[it][i:], wp.creating[it][i+1:])
+                               wp.creating[it] = wp.creating[it][:len(wp.creating[it])-1]
+                               break
+                       }
+               }
                if err, ok := err.(cloud.QuotaError); ok && err.IsQuotaError() {
                        wp.atQuotaErr = err
                        wp.atQuotaUntil = time.Now().Add(time.Minute)
@@ -266,6 +290,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
                state:    initialState,
                instance: inst,
                instType: it,
+               appeared: now,
                probed:   now,
                busy:     now,
                updated:  now,
@@ -579,7 +604,7 @@ func (wp *Pool) Instances() []InstanceView {
 }
 
 func (wp *Pool) setup() {
-       wp.creating = map[arvados.InstanceType]int{}
+       wp.creating = map[arvados.InstanceType][]time.Time{}
        wp.exited = map[string]time.Time{}
        wp.workers = map[cloud.InstanceID]*worker{}
        wp.subscribers = map[<-chan struct{}]chan<- struct{}{}
index f8667e06cdb3917fcbdee4d563833367eb599545..3867e2c63e4b41b752b96cdbdf1c9a6b86b94159 100644 (file)
@@ -68,23 +68,16 @@ func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) {
        pool.Create(type2)
        c.Check(pool.Unallocated()[type1], check.Equals, 1)
        c.Check(pool.Unallocated()[type2], check.Equals, 2)
-       // Unblock the pending Create calls and (before calling Sync!)
-       // wait for the pool to process the returned instances.
+
+       // Unblock the pending Create calls.
        go lameInstanceSet.Release(3)
-       suite.wait(c, pool, notify, func() bool {
-               list, err := lameInstanceSet.Instances(nil)
-               return err == nil && len(list) == 3
-       })
 
-       c.Check(pool.Unallocated()[type1], check.Equals, 1)
-       c.Check(pool.Unallocated()[type2], check.Equals, 2)
-       pool.getInstancesAndSync()
-       // Returned counts can be temporarily higher than 1 and 2 if
-       // poll ran before Create() returned.
-       c.Check(pool.Unallocated()[type1], check.Not(less), 1)
-       c.Check(pool.Unallocated()[type2], check.Not(less), 2)
+       // Wait for each instance to either return from its Create
+       // call, or show up in a poll.
        suite.wait(c, pool, notify, func() bool {
-               return pool.Unallocated()[type1] == 1 && pool.Unallocated()[type2] == 2
+               pool.mtx.RLock()
+               defer pool.mtx.RUnlock()
+               return len(pool.workers) == 3
        })
 
        c.Check(pool.Shutdown(type2), check.Equals, true)
index 85104a13aa9aa745fbdfe7a6a0e32b0eb29219b8..a0a61c1597198171a54b0de373cc84761afacc58 100644 (file)
@@ -64,6 +64,7 @@ type worker struct {
        instType  arvados.InstanceType
        vcpus     int64
        memory    int64
+       appeared  time.Time
        probed    time.Time
        updated   time.Time
        busy      time.Time