creating[it] = len(times)
}
for _, wkr := range wp.workers {
- if !(wkr.state == StateIdle || wkr.state == StateBooting || wkr.state == StateUnknown) || wkr.idleBehavior != IdleBehaviorRun || len(wkr.running) > 0 {
+ // Skip workers that are not expected to become
+ // available soon. Note len(wkr.running)>0 is not
+ // redundant here: it can be true even in
+ // StateUnknown.
+ if wkr.state == StateShutdown ||
+ wkr.state == StateRunning ||
+ wkr.idleBehavior != IdleBehaviorRun ||
+ len(wkr.running) > 0 {
continue
}
it := wkr.instType
// advantage of the non-busy state, though.
wkr.busy = updateTime
}
- running := map[string]struct{}{}
changed := false
+
+ // Build a new "running" map. Set changed=true if it differs
+ // from the existing map (wkr.running) to ensure the scheduler
+ // gets notified below.
+ running := map[string]struct{}{}
for _, uuid := range ctrUUIDs {
running[uuid] = struct{}{}
if _, ok := wkr.running[uuid]; !ok {
+ if _, ok := wkr.starting[uuid]; !ok {
+ // We didn't start it -- it must have
+ // been started by a previous
+ // dispatcher process.
+ logger.WithField("ContainerUUID", uuid).Info("crunch-run process detected")
+ }
changed = true
}
}
changed = true
}
}
+
+ // Update state if this was the first successful boot-probe.
if booted && (wkr.state == StateUnknown || wkr.state == StateBooting) {
// Note: this will change again below if
// len(wkr.starting)+len(wkr.running) > 0.
wkr.state = StateIdle
changed = true
- } else if wkr.state == StateUnknown && len(running) != len(wkr.running) {
+ }
+
+ // If wkr.state and wkr.running aren't changing then there's
+ // no need to log anything, notify the scheduler, move state
+ // back and forth between idle/running, etc.
+ if !changed {
+ return
+ }
+
+ // Log whenever a run-probe reveals crunch-run processes
+ // appearing/disappearing before boot-probe succeeds.
+ if wkr.state == StateUnknown && len(running) != len(wkr.running) {
logger.WithFields(logrus.Fields{
"RunningContainers": len(running),
"State": wkr.state,
}).Info("crunch-run probe succeeded, but boot probe is still failing")
}
- if !changed {
- return
- }
wkr.running = running
if wkr.state == StateIdle && len(wkr.starting)+len(wkr.running) > 0 {
// caller must have lock.
func (wkr *worker) shutdownIfBroken(dur time.Duration) {
if wkr.idleBehavior == IdleBehaviorHold {
+ // Never shut down.
return
}
label, threshold := "", wkr.wp.timeoutProbe
// caller must have lock.
func (wkr *worker) shutdownIfIdle() bool {
if wkr.idleBehavior == IdleBehaviorHold {
- return false
- }
- if !(wkr.state == StateIdle || (wkr.state == StateBooting && wkr.idleBehavior == IdleBehaviorDrain)) {
+ // Never shut down.
return false
}
age := time.Since(wkr.busy)
- if wkr.idleBehavior != IdleBehaviorDrain && age < wkr.wp.timeoutIdle {
+
+ old := age >= wkr.wp.timeoutIdle
+ draining := wkr.idleBehavior == IdleBehaviorDrain
+ shouldShutdown := ((old || draining) && wkr.state == StateIdle) ||
+ (draining && wkr.state == StateBooting)
+ if !shouldShutdown {
return false
}
+
wkr.logger.WithFields(logrus.Fields{
+ "State": wkr.state,
"Age": age,
"IdleBehavior": wkr.idleBehavior,
}).Info("shutdown idle worker")