}
wkr.mtx.Lock()
defer wkr.mtx.Unlock()
+ if wkr.starting[ctr.UUID] != rr {
+ // Someone else (e.g., wkr.probeAndUpdate() ->
+ // wkr.updateRunning() or wkr.Close()) already
+ // moved our runner from wkr.starting to
+ // wkr.running or deleted it while we were in
+ // rr.Start().
+ return
+ }
now := time.Now()
wkr.updated = now
wkr.busy = now
// Never shut down.
return false
}
- label, threshold := "", wkr.wp.timeoutProbe
+ prologue, epilogue, threshold := "", "", wkr.wp.timeoutProbe
if wkr.state == StateUnknown || wkr.state == StateBooting {
- label, threshold = "new ", wkr.wp.timeoutBooting
+ prologue = "new "
+ epilogue = " -- `arvados-server cloudtest` might help troubleshoot, see https://doc.arvados.org/main/admin/cloudtest.html"
+ threshold = wkr.wp.timeoutBooting
}
if dur < threshold {
return false
"Duration": dur,
"Since": wkr.probed,
"State": wkr.state,
- }).Warnf("%sinstance unresponsive, shutting down", label)
+ }).Warnf("%sinstance unresponsive, shutting down%s", prologue, epilogue)
wkr.shutdown()
return true
}
for uuid, rr := range wkr.running {
wkr.logger.WithField("ContainerUUID", uuid).Info("crunch-run process abandoned")
rr.Close()
+ delete(wkr.running, uuid)
}
for uuid, rr := range wkr.starting {
wkr.logger.WithField("ContainerUUID", uuid).Info("crunch-run process abandoned")
rr.Close()
+ delete(wkr.starting, uuid)
}
}