-// should be called in a new goroutine
-func (wp *Pool) probeAndUpdate(wkr *worker) {
- logger := wp.logger.WithField("Instance", wkr.instance)
- wp.mtx.Lock()
- updated := wkr.updated
- needProbeRunning := wkr.state == StateRunning
- needProbeBooted := wkr.state == StateUnknown || wkr.state == StateBooting
- wp.mtx.Unlock()
- if !needProbeBooted && !needProbeRunning {
- return
- }
-
- var (
- ctrUUIDs []string
- ok bool
- stderr []byte
- )
- if needProbeBooted {
- ok, stderr = wp.probeBooted(wkr)
- wp.mtx.Lock()
- if ok && (wkr.state == StateUnknown || wkr.state == StateBooting) {
- wkr.state = StateRunning
- wkr.probed = time.Now()
- logger.Info("instance booted")
- go wp.notify()
- }
- needProbeRunning = wkr.state == StateRunning
- wp.mtx.Unlock()
- }
- if needProbeRunning {
- ctrUUIDs, ok, stderr = wp.probeRunning(wkr)
- }
- logger = logger.WithField("stderr", string(stderr))
- wp.mtx.Lock()
- defer wp.mtx.Unlock()
- if !ok {
- if wkr.state == StateShutdown {
- // Skip the logging noise if shutdown was
- // initiated during probe.
- return
- }
- dur := time.Since(wkr.probed)
- logger := logger.WithFields(logrus.Fields{
- "Duration": dur,
- "State": wkr.state,
- })
- if wkr.state == StateBooting {
- logger.Debug("new instance not responding")
- } else {
- logger.Info("instance not responding")
- }
- wp.shutdownIfBroken(wkr, dur)
- return
- }
-
- updateTime := time.Now()
- wkr.probed = updateTime
-
- if updated != wkr.updated {
- // Worker was updated after the probe began, so
- // wkr.running might have a container UUID that was
- // not yet running when ctrUUIDs was generated. Leave
- // wkr.running alone and wait for the next probe to
- // catch up on any changes.
- return
- }
-
- if len(ctrUUIDs) > 0 {
- wkr.busy = updateTime
- wkr.lastUUID = ctrUUIDs[0]
- } else if len(wkr.running) > 0 {
- // Actual last-busy time was sometime between wkr.busy
- // and now. Now is the earliest opportunity to take
- // advantage of the non-busy state, though.
- wkr.busy = updateTime
- }
- running := map[string]struct{}{}
- changed := false
- for _, uuid := range ctrUUIDs {
- running[uuid] = struct{}{}
- if _, ok := wkr.running[uuid]; !ok {
- changed = true
- }
- }
- for uuid := range wkr.running {
- if _, ok := running[uuid]; !ok {
- logger.WithField("ContainerUUID", uuid).Info("crunch-run process ended")
- wp.exited[uuid] = updateTime
- changed = true
- }
- }
- if changed {
- wkr.running = running
- wkr.updated = updateTime
- go wp.notify()