From 71460cf96c9b43c8ab0a38118c3745a4c0e6d7e9 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Sat, 27 Oct 2018 01:18:36 -0400 Subject: [PATCH] 14360: Move shutdown-if-broken check to its own func. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- lib/dispatchcloud/worker/pool.go | 41 +++++++++++++++++++------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go index a7b5132a5a..4ddd3745ef 100644 --- a/lib/dispatchcloud/worker/pool.go +++ b/lib/dispatchcloud/worker/pool.go @@ -311,7 +311,7 @@ func (wp *Pool) shutdown(wkr *worker, logger logrus.FieldLogger) { go func() { err := wkr.instance.Destroy() if err != nil { - logger.WithError(err).Warn("shutdown failed") + logger.WithError(err).WithField("Instance", wkr.instance).Warn("shutdown failed") return } wp.mtx.Lock() @@ -542,7 +542,7 @@ func (wp *Pool) runProbes() { workers = workers[:0] wp.mtx.Lock() for id, wkr := range wp.workers { - if wkr.state == StateShutdown || wp.autoShutdown(wkr) { + if wkr.state == StateShutdown || wp.shutdownIfIdle(wkr) { continue } workers = append(workers, id) @@ -596,7 +596,28 @@ func (wp *Pool) runSync() { } // caller must have lock. -func (wp *Pool) autoShutdown(wkr *worker) bool { +func (wp *Pool) shutdownIfBroken(wkr *worker, dur time.Duration) { + if wkr.state == StateHold { + return + } + label, threshold := "", wp.timeoutProbe + if wkr.state == StateBooting { + label, threshold = "new ", wp.timeoutBooting + } + if dur < threshold { + return + } + wp.logger.WithFields(logrus.Fields{ + "Instance": wkr.instance, + "Duration": dur, + "Since": wkr.probed, + "State": wkr.state, + }).Warnf("%sinstance unresponsive, shutting down", label) + wp.shutdown(wkr, wp.logger) +} + +// caller must have lock. +func (wp *Pool) shutdownIfIdle(wkr *worker) bool { if len(wkr.running)+len(wkr.starting) > 0 || wkr.state != StateRunning { return false } @@ -762,19 +783,7 @@ func (wp *Pool) probeAndUpdate(wkr *worker) { } else { logger.Info("instance not responding") } - - if wkr.state == StateHold { - return - } - - label, threshold := "", wp.timeoutProbe - if wkr.state == StateBooting { - label, threshold = "new ", wp.timeoutBooting - } - if dur > threshold { - logger.WithField("Since", wkr.probed).Warnf("%sinstance unresponsive, shutting down", label) - wp.shutdown(wkr, logger) - } + wp.shutdownIfBroken(wkr, dur) return } -- 2.30.2