From b1df618b0c33eccc4ab15d89aad4a10fbf245993 Mon Sep 17 00:00:00 2001 From: Ward Vandewege Date: Fri, 11 Sep 2020 12:30:07 -0400 Subject: [PATCH] 16636: add instances_time_from_shutdown_request_to_disappearance_seconds metric Arvados-DCO-1.1-Signed-off-by: Ward Vandewege --- lib/dispatchcloud/dispatcher_test.go | 2 ++ lib/dispatchcloud/worker/pool.go | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go index 6e1850410b..92a42c7a21 100644 --- a/lib/dispatchcloud/dispatcher_test.go +++ b/lib/dispatchcloud/dispatcher_test.go @@ -221,6 +221,8 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) { c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ready_for_container_seconds{quantile="0.95"} [0-9.]*`) c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ready_for_container_seconds_count [0-9]*`) c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ready_for_container_seconds_sum [0-9.]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_shutdown_request_to_disappearance_seconds_count [0-9]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_shutdown_request_to_disappearance_seconds_sum [0-9.]*`) } func (s *DispatcherSuite) TestAPIPermissions(c *check.C) { diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go index 086887cb44..75bc01fc0e 100644 --- a/lib/dispatchcloud/worker/pool.go +++ b/lib/dispatchcloud/worker/pool.go @@ -179,6 +179,7 @@ type Pool struct { mDisappearances *prometheus.CounterVec mTimeToSSH prometheus.Summary mTimeToReadyForContainer prometheus.Summary + mTimeFromShutdownToGone prometheus.Summary } type createCall struct { @@ -661,6 +662,14 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) { Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001}, }) reg.MustRegister(wp.mTimeToReadyForContainer) + wp.mTimeFromShutdownToGone = prometheus.NewSummary(prometheus.SummaryOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "instances_time_from_shutdown_request_to_disappearance_seconds", + Help: "Number of seconds between the first shutdown attempt and the disappearance of the worker.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001}, + }) + reg.MustRegister(wp.mTimeFromShutdownToGone) } func (wp *Pool) runMetrics() { @@ -930,6 +939,9 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) { if wp.mDisappearances != nil { wp.mDisappearances.WithLabelValues(stateString[wkr.state]).Inc() } + if wp.mTimeFromShutdownToGone != nil { + wp.mTimeFromShutdownToGone.Observe(time.Now().Sub(wkr.destroyed).Seconds()) + } delete(wp.workers, id) go wkr.Close() notify = true -- 2.39.5