c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_shutdown_request_to_disappearance_seconds_sum [0-9.]*`)
c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_queue_to_crunch_run_seconds_count [0-9]*`)
c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_queue_to_crunch_run_seconds_sum [0-9e+.]*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*run_probe_duration_seconds_count{outcome="success"} [0-9]*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*run_probe_duration_seconds_sum{outcome="success"} [0-9e+.]*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*run_probe_duration_seconds_count{outcome="fail"} [0-9]*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*run_probe_duration_seconds_sum{outcome="fail"} [0-9e+.]*`)
}
func (s *DispatcherSuite) TestAPIPermissions(c *check.C) {
mTimeToReadyForContainer prometheus.Summary
mTimeFromShutdownToGone prometheus.Summary
mTimeFromQueueToCrunchRun prometheus.Summary
+ mRunProbeDuration *prometheus.SummaryVec
}
type createCall struct {
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
})
reg.MustRegister(wp.mTimeFromQueueToCrunchRun)
+ wp.mRunProbeDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
+ Namespace: "arvados",
+ Subsystem: "dispatchcloud",
+ Name: "instances_run_probe_duration_seconds",
+ Help: "Number of seconds per runProbe call.",
+ Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
+ }, []string{"outcome"})
+ reg.MustRegister(wp.mRunProbeDuration)
}
func (wp *Pool) runMetrics() {
}
// ProbeAndUpdate conducts appropriate boot/running probes (if any)
-// for the worker's curent state. If a previous probe is still
+// for the worker's current state. If a previous probe is still
// running, it does nothing.
//
// It should be called in a new goroutine.
if u := wkr.instance.RemoteUser(); u != "root" {
cmd = "sudo " + cmd
}
+ before := time.Now()
stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
if err != nil {
wkr.logger.WithFields(logrus.Fields{
"stdout": string(stdout),
"stderr": string(stderr),
}).WithError(err).Warn("probe failed")
+ wkr.wp.mRunProbeDuration.WithLabelValues("fail").Observe(time.Now().Sub(before).Seconds())
return
}
+ wkr.wp.mRunProbeDuration.WithLabelValues("success").Observe(time.Now().Sub(before).Seconds())
ok = true
staleRunLock := false
"git.arvados.org/arvados.git/lib/dispatchcloud/test"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/prometheus/client_golang/prometheus"
check "gopkg.in/check.v1"
)
runnerData: trial.deployRunner,
runnerMD5: md5.Sum(trial.deployRunner),
}
+ wp.registerMetrics(prometheus.NewRegistry())
if trial.deployRunner != nil {
svHash := md5.Sum(trial.deployRunner)
wp.runnerCmd = fmt.Sprintf("/var/run/arvados/crunch-run~%x", svHash)