X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/d371545cd0f62b189e19b747d78ddc1d713510f9..7af506a0e9712ca22096ebd56df8867a427dae96:/lib/dispatchcloud/scheduler/scheduler.go diff --git a/lib/dispatchcloud/scheduler/scheduler.go b/lib/dispatchcloud/scheduler/scheduler.go index 892f2f3ca3..ee7ab50883 100644 --- a/lib/dispatchcloud/scheduler/scheduler.go +++ b/lib/dispatchcloud/scheduler/scheduler.go @@ -46,9 +46,11 @@ type Scheduler struct { stop chan struct{} stopped chan struct{} - last503time time.Time // last time API responded 503 - maxConcurrency int // dynamic container limit (0 = unlimited), see runQueue() - maxSupervisors int // maximum number of "supervisor" containers (these are containers who's main job is to launch other containers, e.g. workflow runners) + last503time time.Time // last time API responded 503 + maxConcurrency int // dynamic container limit (0 = unlimited), see runQueue() + supervisorFraction float64 // maximum fraction of "supervisor" containers (these are containers who's main job is to launch other containers, e.g. workflow runners) + maxInstances int // maximum number of instances the pool will bring up (0 = unlimited) + instancesWithinQuota int // max concurrency achieved since last quota error (0 = no quota error yet) mContainersAllocatedNotStarted prometheus.Gauge mContainersNotAllocatedOverQuota prometheus.Gauge @@ -61,7 +63,7 @@ type Scheduler struct { // // Any given queue and pool should not be used by more than one // scheduler at a time. -func New(ctx context.Context, client *arvados.Client, queue ContainerQueue, pool WorkerPool, reg *prometheus.Registry, staleLockTimeout, queueUpdateInterval time.Duration, maxSupervisors int) *Scheduler { +func New(ctx context.Context, client *arvados.Client, queue ContainerQueue, pool WorkerPool, reg *prometheus.Registry, staleLockTimeout, queueUpdateInterval time.Duration, minQuota, maxInstances int, supervisorFraction float64) *Scheduler { sch := &Scheduler{ logger: ctxlog.FromContext(ctx), client: client, @@ -74,7 +76,13 @@ func New(ctx context.Context, client *arvados.Client, queue ContainerQueue, pool stop: make(chan struct{}), stopped: make(chan struct{}), uuidOp: map[string]string{}, - maxSupervisors: maxSupervisors, + supervisorFraction: supervisorFraction, + maxInstances: maxInstances, + } + if minQuota > 0 { + sch.maxConcurrency = minQuota + } else { + sch.maxConcurrency = maxInstances } sch.registerMetrics(reg) return sch @@ -119,6 +127,18 @@ func (sch *Scheduler) registerMetrics(reg *prometheus.Registry) { Help: "Dynamically assigned limit on number of containers scheduled concurrency, set after receiving 503 errors from API.", }) reg.MustRegister(sch.mMaxContainerConcurrency) + reg.MustRegister(prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "at_quota", + Help: "Flag indicating the cloud driver is reporting an at-quota condition.", + }, func() float64 { + if sch.pool.AtQuota() { + return 1 + } else { + return 0 + } + })) } func (sch *Scheduler) updateMetrics() {