StateShutdown: "shutdown",
}
-// BootOutcome is the result of a worker boot. It is used as a label in a metric.
-type BootOutcome string
-
-const (
- BootOutcomeFailed BootOutcome = "failure"
- BootOutcomeSucceeded BootOutcome = "success"
- BootOutcomeIdleShutdown BootOutcome = "idle shutdown"
- BootOutcomeDisappeared BootOutcome = "disappeared"
-)
-
-var validBootOutcomes = map[BootOutcome]bool{
- BootOutcomeFailed: true,
- BootOutcomeSucceeded: true,
- BootOutcomeIdleShutdown: true,
- BootOutcomeDisappeared: true,
-}
-
-func (wkr *worker) reportBootOutcome(outcome BootOutcome) {
- if wkr.bootOutcomeReported {
- return
- }
- if wkr.wp.mBootOutcomes != nil {
- wkr.wp.mBootOutcomes.WithLabelValues(string(outcome)).Inc()
- }
- wkr.bootOutcomeReported = true
-}
-
// String implements fmt.Stringer.
func (s State) String() string {
return stateString[s]
return []byte(stateString[s]), nil
}
+// BootOutcome is the result of a worker boot. It is used as a label in a metric.
+type BootOutcome string
+
+const (
+ BootOutcomeFailed BootOutcome = "failure"
+ BootOutcomeSucceeded BootOutcome = "success"
+ BootOutcomeAborted BootOutcome = "aborted"
+ BootOutcomeDisappeared BootOutcome = "disappeared"
+)
+
+var validBootOutcomes = map[BootOutcome]bool{
+ BootOutcomeFailed: true,
+ BootOutcomeSucceeded: true,
+ BootOutcomeAborted: true,
+ BootOutcomeDisappeared: true,
+}
+
// IdleBehavior indicates the behavior desired when a node becomes idle.
type IdleBehavior string
updated time.Time
busy time.Time
destroyed time.Time
+ firstSSHConnection time.Time
lastUUID string
running map[string]*remoteRunner // remember to update state idle<->running when this changes
starting map[string]*remoteRunner // remember to update state idle<->running when this changes
probing chan struct{}
bootOutcomeReported bool
+ timeToReadyReported bool
}
func (wkr *worker) onUnkillable(uuid string) {
go wkr.wp.notify()
}
+// caller must have lock.
+func (wkr *worker) reportBootOutcome(outcome BootOutcome) {
+ if wkr.bootOutcomeReported {
+ return
+ }
+ if wkr.wp.mBootOutcomes != nil {
+ wkr.wp.mBootOutcomes.WithLabelValues(string(outcome)).Inc()
+ }
+ wkr.bootOutcomeReported = true
+}
+
+// caller must have lock.
+func (wkr *worker) reportTimeBetweenFirstSSHAndReadyForContainer() {
+ if wkr.timeToReadyReported {
+ return
+ }
+ if wkr.wp.mTimeToSSH != nil {
+ wkr.wp.mTimeToReadyForContainer.Observe(time.Since(wkr.firstSSHConnection).Seconds())
+ }
+ wkr.timeToReadyReported = true
+}
+
// caller must have lock.
func (wkr *worker) setIdleBehavior(idleBehavior IdleBehavior) {
wkr.logger.WithField("IdleBehavior", idleBehavior).Info("set idle behavior")
// Update state if this was the first successful boot-probe.
if booted && (wkr.state == StateUnknown || wkr.state == StateBooting) {
+ if wkr.state == StateBooting {
+ wkr.reportTimeBetweenFirstSSHAndReadyForContainer()
+ }
// Note: this will change again below if
// len(wkr.starting)+len(wkr.running) > 0.
wkr.state = StateIdle
"IdleDuration": stats.Duration(time.Since(wkr.busy)),
"IdleBehavior": wkr.idleBehavior,
}).Info("shutdown worker")
- wkr.reportBootOutcome(BootOutcomeIdleShutdown)
+ wkr.reportBootOutcome(BootOutcomeAborted)
wkr.shutdown()
return true
}