19973: Add metrics for automatic container concurrency limit.
[arvados.git] / lib / dispatchcloud / scheduler / scheduler.go
index 6409ea031a4f02228118bc081891990dfcbe20f9..4644dc4ea4db00782b38589f546f5cb22d577e88 100644 (file)
@@ -11,7 +11,9 @@ import (
        "sync"
        "time"
 
+       "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/prometheus/client_golang/prometheus"
        "github.com/sirupsen/logrus"
 )
 
@@ -29,8 +31,10 @@ import (
 // shuts down idle workers, in case they are consuming quota.
 type Scheduler struct {
        logger              logrus.FieldLogger
+       client              *arvados.Client
        queue               ContainerQueue
        pool                WorkerPool
+       reg                 *prometheus.Registry
        staleLockTimeout    time.Duration
        queueUpdateInterval time.Duration
 
@@ -41,17 +45,28 @@ type Scheduler struct {
        runOnce sync.Once
        stop    chan struct{}
        stopped chan struct{}
+
+       last503time    time.Time // last time API responded 503
+       maxConcurrency int       // dynamic container limit (0 = unlimited), see runQueue()
+
+       mContainersAllocatedNotStarted   prometheus.Gauge
+       mContainersNotAllocatedOverQuota prometheus.Gauge
+       mLongestWaitTimeSinceQueue       prometheus.Gauge
+       mLast503Time                     prometheus.Gauge
+       mMaxContainerConcurrency         prometheus.Gauge
 }
 
 // New returns a new unstarted Scheduler.
 //
 // Any given queue and pool should not be used by more than one
 // scheduler at a time.
-func New(ctx context.Context, queue ContainerQueue, pool WorkerPool, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler {
-       return &Scheduler{
+func New(ctx context.Context, client *arvados.Client, queue ContainerQueue, pool WorkerPool, reg *prometheus.Registry, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler {
+       sch := &Scheduler{
                logger:              ctxlog.FromContext(ctx),
+               client:              client,
                queue:               queue,
                pool:                pool,
+               reg:                 reg,
                staleLockTimeout:    staleLockTimeout,
                queueUpdateInterval: queueUpdateInterval,
                wakeup:              time.NewTimer(time.Second),
@@ -59,6 +74,73 @@ func New(ctx context.Context, queue ContainerQueue, pool WorkerPool, staleLockTi
                stopped:             make(chan struct{}),
                uuidOp:              map[string]string{},
        }
+       sch.registerMetrics(reg)
+       return sch
+}
+
+func (sch *Scheduler) registerMetrics(reg *prometheus.Registry) {
+       if reg == nil {
+               reg = prometheus.NewRegistry()
+       }
+       sch.mContainersAllocatedNotStarted = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "containers_allocated_not_started",
+               Help:      "Number of containers allocated to a worker but not started yet (worker is booting).",
+       })
+       reg.MustRegister(sch.mContainersAllocatedNotStarted)
+       sch.mContainersNotAllocatedOverQuota = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "containers_not_allocated_over_quota",
+               Help:      "Number of containers not allocated to a worker because the system has hit a quota.",
+       })
+       reg.MustRegister(sch.mContainersNotAllocatedOverQuota)
+       sch.mLongestWaitTimeSinceQueue = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "containers_longest_wait_time_seconds",
+               Help:      "Current longest wait time of any container since queuing, and before the start of crunch-run.",
+       })
+       reg.MustRegister(sch.mLongestWaitTimeSinceQueue)
+       sch.mLast503Time = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "last_503_time",
+               Help:      "Time of most recent 503 error received from API.",
+       })
+       reg.MustRegister(sch.mLast503Time)
+       sch.mMaxContainerConcurrency = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "max_concurrent_containers",
+               Help:      "Dynamically assigned limit on number of containers scheduled concurrency, set after receiving 503 errors from API.",
+       })
+       reg.MustRegister(sch.mMaxContainerConcurrency)
+}
+
+func (sch *Scheduler) updateMetrics() {
+       earliest := time.Time{}
+       entries, _ := sch.queue.Entries()
+       running := sch.pool.Running()
+       for _, ent := range entries {
+               if ent.Container.Priority > 0 &&
+                       (ent.Container.State == arvados.ContainerStateQueued || ent.Container.State == arvados.ContainerStateLocked) {
+                       // Exclude containers that are preparing to run the payload (i.e.
+                       // ContainerStateLocked and running on a worker, most likely loading the
+                       // payload image
+                       if _, ok := running[ent.Container.UUID]; !ok {
+                               if ent.Container.CreatedAt.Before(earliest) || earliest.IsZero() {
+                                       earliest = ent.Container.CreatedAt
+                               }
+                       }
+               }
+       }
+       if !earliest.IsZero() {
+               sch.mLongestWaitTimeSinceQueue.Set(time.Since(earliest).Seconds())
+       } else {
+               sch.mLongestWaitTimeSinceQueue.Set(0)
+       }
 }
 
 // Start starts the scheduler.
@@ -113,6 +195,7 @@ func (sch *Scheduler) run() {
        for {
                sch.runQueue()
                sch.sync()
+               sch.updateMetrics()
                select {
                case <-sch.stop:
                        return