20667: Reduce max supervisors if pool reaches cloud quota.
authorTom Clegg <tom@curii.com>
Mon, 26 Jun 2023 15:00:33 +0000 (11:00 -0400)
committerTom Clegg <tom@curii.com>
Mon, 26 Jun 2023 15:00:33 +0000 (11:00 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/dispatchcloud/scheduler/run_queue.go

index 63ac4a7b32122e68787bad1b71c1c5a988e97440..0c4634d7554a5487a55a0f9c53200e9beb01fabb 100644 (file)
@@ -92,6 +92,19 @@ func (sch *Scheduler) runQueue() {
        if sch.maxInstances > 0 && sch.maxConcurrency > sch.maxInstances {
                sch.maxConcurrency = sch.maxInstances
        }
+       if sch.pool.AtQuota() && len(running) > 0 && (sch.maxConcurrency == 0 || sch.maxConcurrency > len(running)) {
+               // Consider current workload to be the maximum
+               // allowed, for the sake of reporting metrics and
+               // calculating max supervisors.
+               //
+               // Now that sch.maxConcurrency is set, we will only
+               // raise it past len(running) by 10%.  This helps
+               // avoid running an inappropriate number of
+               // supervisors when we reach the cloud-imposed quota
+               // (which may be based on # CPUs etc) long before the
+               // configured MaxInstances.
+               sch.maxConcurrency = len(running)
+       }
        sch.mMaxContainerConcurrency.Set(float64(sch.maxConcurrency))
 
        maxSupervisors := int(float64(sch.maxConcurrency) * sch.supervisorFraction)