14977: Don't try fixStaleLocks until worker pool state is loaded.
authorTom Clegg <tclegg@veritasgenetics.com>
Fri, 15 Mar 2019 20:19:51 +0000 (16:19 -0400)
committerTom Clegg <tclegg@veritasgenetics.com>
Fri, 15 Mar 2019 20:34:14 +0000 (16:34 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

lib/dispatchcloud/worker/pool.go

index e81c2c091f1c37c7b52488b4d919bdb9a9fe4d79..e90935e2aa9e5747d08e136475cd186c0b4bc766 100644 (file)
@@ -419,8 +419,12 @@ func (wp *Pool) Shutdown(it arvados.InstanceType) bool {
 }
 
 // CountWorkers returns the current number of workers in each state.
+//
+// CountWorkers blocks, if necessary, until the initial instance list
+// has been loaded from the cloud provider.
 func (wp *Pool) CountWorkers() map[State]int {
        wp.setupOnce.Do(wp.setup)
+       wp.waitUntilLoaded()
        wp.mtx.Lock()
        defer wp.mtx.Unlock()
        r := map[State]int{}
@@ -786,6 +790,7 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
        }
 
        if !wp.loaded {
+               notify = true
                wp.loaded = true
                wp.logger.WithField("N", len(wp.workers)).Info("loaded initial instance list")
        }
@@ -795,6 +800,17 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
        }
 }
 
+func (wp *Pool) waitUntilLoaded() {
+       ch := wp.Subscribe()
+       wp.mtx.RLock()
+       defer wp.mtx.RUnlock()
+       for !wp.loaded {
+               wp.mtx.RUnlock()
+               <-ch
+               wp.mtx.RLock()
+       }
+}
+
 // Return a random string of n hexadecimal digits (n*4 random bits). n
 // must be even.
 func randomHex(n int) string {