19973: Add metrics for automatic container concurrency limit.

author Tom Clegg <tom@curii.com>

Fri, 10 Feb 2023 15:54:49 +0000 (10:54 -0500)

committer Tom Clegg <tom@curii.com>

Fri, 10 Feb 2023 15:54:49 +0000 (10:54 -0500)
author Tom Clegg <tom@curii.com>
Fri, 10 Feb 2023 15:54:49 +0000 (10:54 -0500)
committer Tom Clegg <tom@curii.com>
Fri, 10 Feb 2023 15:54:49 +0000 (10:54 -0500)
diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go

index cfd95e94549745140c619f26b9f914aa656c969d..057ff8d6e29505fa9319071d1f135a939ef0a83e 100644 (file)
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -62,6 +62,8 @@ func (sch *Scheduler) runQueue() {
                         sch.maxConcurrency = max
                 }
         }
+       sch.mLast503Time.Set(float64(sch.last503time.Unix()))
+       sch.mMaxContainerConcurrency.Set(float64(sch.maxConcurrency))
  
         sch.logger.WithFields(logrus.Fields{
                 "Containers":     len(sorted),
diff --git a/lib/dispatchcloud/scheduler/scheduler.go b/lib/dispatchcloud/scheduler/scheduler.go

index 589aa3ec1140774a5446fdcf0331cb5d781734f7..4644dc4ea4db00782b38589f546f5cb22d577e88 100644 (file)
--- a/lib/dispatchcloud/scheduler/scheduler.go
+++ b/lib/dispatchcloud/scheduler/scheduler.go
@@ -52,6 +52,8 @@ type Scheduler struct {
         mContainersAllocatedNotStarted   prometheus.Gauge
         mContainersNotAllocatedOverQuota prometheus.Gauge
         mLongestWaitTimeSinceQueue       prometheus.Gauge
+       mLast503Time                     prometheus.Gauge
+       mMaxContainerConcurrency         prometheus.Gauge
  }
  
  // New returns a new unstarted Scheduler.
@@ -101,6 +103,20 @@ func (sch *Scheduler) registerMetrics(reg *prometheus.Registry) {
                 Help:      "Current longest wait time of any container since queuing, and before the start of crunch-run.",
         })
         reg.MustRegister(sch.mLongestWaitTimeSinceQueue)
+       sch.mLast503Time = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "last_503_time",
+               Help:      "Time of most recent 503 error received from API.",
+       })
+       reg.MustRegister(sch.mLast503Time)
+       sch.mMaxContainerConcurrency = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "max_concurrent_containers",
+               Help:      "Dynamically assigned limit on number of containers scheduled concurrency, set after receiving 503 errors from API.",
+       })
+       reg.MustRegister(sch.mMaxContainerConcurrency)
  }
  
  func (sch *Scheduler) updateMetrics() {
author	Tom Clegg <tom@curii.com>
	Fri, 10 Feb 2023 15:54:49 +0000 (10:54 -0500)
committer	Tom Clegg <tom@curii.com>
	Fri, 10 Feb 2023 15:54:49 +0000 (10:54 -0500)
lib/dispatchcloud/scheduler/run_queue.go		patch \| blob \| history
lib/dispatchcloud/scheduler/scheduler.go		patch \| blob \| history