Merge branch '20520-instance-init-command'
authorTom Clegg <tom@curii.com>
Fri, 26 May 2023 19:30:25 +0000 (15:30 -0400)
committerTom Clegg <tom@curii.com>
Fri, 26 May 2023 19:30:25 +0000 (15:30 -0400)
refs #20520

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

doc/admin/metrics.html.textile.liquid
lib/controller/integration_test.go
lib/dispatchcloud/scheduler/run_queue.go
lib/dispatchcloud/scheduler/run_queue_test.go
sdk/go/arvados/client.go
sdk/go/arvados/limiter.go
sdk/go/arvados/limiter_test.go
tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls

index b140bcc1badda0c2996725bf62a026345c0646c6..79d895ae48df1bf53d8ec2c0e1c6eafc3f64f237 100644 (file)
@@ -31,7 +31,7 @@ When configuring Prometheus, use a @bearer_token@ or @bearer_token_file@ option
 
 table(table table-bordered table-condensed table-hover).
 |_. Component|_. Metrics endpoint|
-|arvados-api-server||
+|arvados-api-server||
 |arvados-controller|✓|
 |arvados-dispatch-cloud|✓|
 |arvados-dispatch-lsf|✓|
index 12fc50089df0cc2cb9b926e4f78acccfc936321a..0557aa3fddde824ea6f3cd1f7c1f8e09a974da2b 100644 (file)
@@ -501,6 +501,7 @@ func (s *IntegrationSuite) TestCreateContainerRequestWithFedToken(c *check.C) {
        req.Header.Set("Authorization", "OAuth2 "+ac2.AuthToken)
        resp, err = arvados.InsecureHTTPClient.Do(req)
        c.Assert(err, check.IsNil)
+       defer resp.Body.Close()
        err = json.NewDecoder(resp.Body).Decode(&cr)
        c.Check(err, check.IsNil)
        c.Check(cr.UUID, check.Matches, "z2222-.*")
@@ -538,8 +539,10 @@ func (s *IntegrationSuite) TestCreateContainerRequestWithBadToken(c *check.C) {
                c.Assert(err, check.IsNil)
                req.Header.Set("Content-Type", "application/json")
                resp, err := ac1.Do(req)
-               c.Assert(err, check.IsNil)
-               c.Assert(resp.StatusCode, check.Equals, tt.expectedCode)
+               if c.Check(err, check.IsNil) {
+                       c.Assert(resp.StatusCode, check.Equals, tt.expectedCode)
+                       resp.Body.Close()
+               }
        }
 }
 
@@ -607,9 +610,11 @@ func (s *IntegrationSuite) TestRequestIDHeader(c *check.C) {
                        var jresp httpserver.ErrorResponse
                        err := json.NewDecoder(resp.Body).Decode(&jresp)
                        c.Check(err, check.IsNil)
-                       c.Assert(jresp.Errors, check.HasLen, 1)
-                       c.Check(jresp.Errors[0], check.Matches, `.*\(`+respHdr+`\).*`)
+                       if c.Check(jresp.Errors, check.HasLen, 1) {
+                               c.Check(jresp.Errors[0], check.Matches, `.*\(`+respHdr+`\).*`)
+                       }
                }
+               resp.Body.Close()
        }
 }
 
index 8f4c2e083da584a869c7b8c34bb8d69434ad1584..dcb348878dcb5404eac881e6a3301c61a58e4ee1 100644 (file)
@@ -16,12 +16,25 @@ import (
 var quietAfter503 = time.Minute
 
 func (sch *Scheduler) runQueue() {
+       running := sch.pool.Running()
+       unalloc := sch.pool.Unallocated()
+
        unsorted, _ := sch.queue.Entries()
        sorted := make([]container.QueueEnt, 0, len(unsorted))
        for _, ent := range unsorted {
                sorted = append(sorted, ent)
        }
        sort.Slice(sorted, func(i, j int) bool {
+               _, irunning := running[sorted[i].Container.UUID]
+               _, jrunning := running[sorted[j].Container.UUID]
+               if irunning != jrunning {
+                       // Ensure the "tryrun" loop (see below) sees
+                       // already-scheduled containers first, to
+                       // ensure existing supervisor containers are
+                       // properly counted before we decide whether
+                       // we have room for new ones.
+                       return irunning
+               }
                ilocked := sorted[i].Container.State == arvados.ContainerStateLocked
                jlocked := sorted[j].Container.State == arvados.ContainerStateLocked
                if ilocked != jlocked {
@@ -46,9 +59,6 @@ func (sch *Scheduler) runQueue() {
                }
        })
 
-       running := sch.pool.Running()
-       unalloc := sch.pool.Unallocated()
-
        if t := sch.client.Last503(); t.After(sch.last503time) {
                // API has sent an HTTP 503 response since last time
                // we checked. Use current #containers - 1 as
@@ -120,8 +130,7 @@ tryrun:
                case arvados.ContainerStateQueued:
                        if sch.maxConcurrency > 0 && trying >= sch.maxConcurrency {
                                logger.Tracef("not locking: already at maxConcurrency %d", sch.maxConcurrency)
-                               overquota = sorted[i:]
-                               break tryrun
+                               continue
                        }
                        trying++
                        if unalloc[it] < 1 && sch.pool.AtQuota() {
@@ -137,9 +146,8 @@ tryrun:
                        unalloc[it]--
                case arvados.ContainerStateLocked:
                        if sch.maxConcurrency > 0 && trying >= sch.maxConcurrency {
-                               logger.Debugf("not starting: already at maxConcurrency %d", sch.maxConcurrency)
-                               overquota = sorted[i:]
-                               break tryrun
+                               logger.Tracef("not starting: already at maxConcurrency %d", sch.maxConcurrency)
+                               continue
                        }
                        trying++
                        if unalloc[it] > 0 {
@@ -206,6 +214,8 @@ tryrun:
                                }
                        }
                }
+       }
+       if len(overquota) > 0 {
                // Shut down idle workers that didn't get any
                // containers mapped onto them before we hit quota.
                for it, n := range unalloc {
index 3278c7de69333926dfea8363b3f41626be294a21..73602f810971d6c109679b206f9b5bedbcb38945 100644 (file)
@@ -278,6 +278,214 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
        }
 }
 
+// Don't unlock containers or shutdown unalloc (booting/idle) nodes
+// just because some 503 errors caused us to reduce maxConcurrency
+// below the current load level.
+//
+// We expect to raise maxConcurrency soon when we stop seeing 503s. If
+// that doesn't happen soon, the idle timeout will take care of the
+// excess nodes.
+func (*SchedulerSuite) TestIdleIn503QuietPeriod(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
+       queue := test.Queue{
+               ChooseType: chooseType,
+               Containers: []arvados.Container{
+                       // scheduled on an instance (but not Running yet)
+                       {
+                               UUID:     test.ContainerUUID(1),
+                               Priority: 1000,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 2,
+                                       RAM:   2 << 30,
+                               },
+                       },
+                       // not yet scheduled
+                       {
+                               UUID:     test.ContainerUUID(2),
+                               Priority: 1000,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 2,
+                                       RAM:   2 << 30,
+                               },
+                       },
+                       // scheduled on an instance (but not Running yet)
+                       {
+                               UUID:     test.ContainerUUID(3),
+                               Priority: 1000,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 3,
+                                       RAM:   3 << 30,
+                               },
+                       },
+                       // not yet scheduled
+                       {
+                               UUID:     test.ContainerUUID(4),
+                               Priority: 1000,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 3,
+                                       RAM:   3 << 30,
+                               },
+                       },
+                       // not yet locked
+                       {
+                               UUID:     test.ContainerUUID(5),
+                               Priority: 1000,
+                               State:    arvados.ContainerStateQueued,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 3,
+                                       RAM:   3 << 30,
+                               },
+                       },
+               },
+       }
+       queue.Update()
+       pool := stubPool{
+               quota: 16,
+               unalloc: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 2,
+                       test.InstanceType(3): 2,
+               },
+               idle: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 1,
+                       test.InstanceType(3): 1,
+               },
+               running: map[string]time.Time{
+                       test.ContainerUUID(1): {},
+                       test.ContainerUUID(3): {},
+               },
+               creates:   []arvados.InstanceType{},
+               starts:    []string{},
+               canCreate: 0,
+       }
+       sch := New(ctx, arvados.NewClientFromEnv(), &queue, &pool, nil, time.Millisecond, time.Millisecond, 0)
+       sch.last503time = time.Now()
+       sch.maxConcurrency = 3
+       sch.sync()
+       sch.runQueue()
+       sch.sync()
+
+       c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(2)})
+       c.Check(pool.shutdowns, check.Equals, 0)
+       c.Check(pool.creates, check.HasLen, 0)
+       c.Check(queue.StateChanges(), check.HasLen, 0)
+}
+
+// If we somehow have more supervisor containers in Locked state than
+// we should (e.g., config changed since they started), and some
+// appropriate-sized instances booting up, unlock the excess
+// supervisor containers, but let the instances keep booting.
+func (*SchedulerSuite) TestUnlockExcessSupervisors(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
+       queue := test.Queue{
+               ChooseType: chooseType,
+       }
+       for i := 1; i <= 6; i++ {
+               queue.Containers = append(queue.Containers, arvados.Container{
+                       UUID:     test.ContainerUUID(i),
+                       Priority: int64(1000 - i),
+                       State:    arvados.ContainerStateLocked,
+                       RuntimeConstraints: arvados.RuntimeConstraints{
+                               VCPUs: 2,
+                               RAM:   2 << 30,
+                       },
+                       SchedulingParameters: arvados.SchedulingParameters{
+                               Supervisor: true,
+                       },
+               })
+       }
+       queue.Update()
+       pool := stubPool{
+               quota: 16,
+               unalloc: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 2,
+               },
+               idle: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 1,
+               },
+               running: map[string]time.Time{
+                       test.ContainerUUID(1): {},
+                       test.ContainerUUID(2): {},
+                       test.ContainerUUID(3): {},
+                       test.ContainerUUID(4): {},
+               },
+               creates:   []arvados.InstanceType{},
+               starts:    []string{},
+               canCreate: 0,
+       }
+       sch := New(ctx, arvados.NewClientFromEnv(), &queue, &pool, nil, time.Millisecond, time.Millisecond, 4)
+       sch.sync()
+       sch.runQueue()
+       sch.sync()
+
+       c.Check(pool.starts, check.DeepEquals, []string{})
+       c.Check(pool.shutdowns, check.Equals, 0)
+       c.Check(pool.creates, check.HasLen, 0)
+       c.Check(queue.StateChanges(), check.DeepEquals, []test.QueueStateChange{
+               {UUID: test.ContainerUUID(5), From: "Locked", To: "Queued"},
+               {UUID: test.ContainerUUID(6), From: "Locked", To: "Queued"},
+       })
+}
+
+// Assuming we're not at quota, don't try to shutdown idle nodes
+// merely because we have more queued/locked supervisor containers
+// than MaxSupervisors -- it won't help.
+func (*SchedulerSuite) TestExcessSupervisors(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
+       queue := test.Queue{
+               ChooseType: chooseType,
+       }
+       for i := 1; i <= 8; i++ {
+               queue.Containers = append(queue.Containers, arvados.Container{
+                       UUID:     test.ContainerUUID(i),
+                       Priority: int64(1000 + i),
+                       State:    arvados.ContainerStateQueued,
+                       RuntimeConstraints: arvados.RuntimeConstraints{
+                               VCPUs: 2,
+                               RAM:   2 << 30,
+                       },
+                       SchedulingParameters: arvados.SchedulingParameters{
+                               Supervisor: true,
+                       },
+               })
+       }
+       for i := 2; i < 4; i++ {
+               queue.Containers[i].State = arvados.ContainerStateLocked
+       }
+       for i := 4; i < 6; i++ {
+               queue.Containers[i].State = arvados.ContainerStateRunning
+       }
+       queue.Update()
+       pool := stubPool{
+               quota: 16,
+               unalloc: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 2,
+               },
+               idle: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 1,
+               },
+               running: map[string]time.Time{
+                       test.ContainerUUID(5): {},
+                       test.ContainerUUID(6): {},
+               },
+               creates:   []arvados.InstanceType{},
+               starts:    []string{},
+               canCreate: 0,
+       }
+       sch := New(ctx, arvados.NewClientFromEnv(), &queue, &pool, nil, time.Millisecond, time.Millisecond, 4)
+       sch.sync()
+       sch.runQueue()
+       sch.sync()
+
+       c.Check(pool.starts, check.HasLen, 2)
+       c.Check(pool.shutdowns, check.Equals, 0)
+       c.Check(pool.creates, check.HasLen, 0)
+       c.Check(queue.StateChanges(), check.HasLen, 0)
+}
+
 // Don't flap lock/unlock when equal-priority containers compete for
 // limited workers.
 //
index d1a87e30e8c33408d0b9c51d18fc6ddc28466aac..6316d1bedaceb35bc7d3f5578aff3425078aabb7 100644 (file)
@@ -142,11 +142,12 @@ func NewClientFromConfig(cluster *Cluster) (*Client, error) {
                }
        }
        return &Client{
-               Client:   hc,
-               Scheme:   ctrlURL.Scheme,
-               APIHost:  ctrlURL.Host,
-               Insecure: cluster.TLS.Insecure,
-               Timeout:  5 * time.Minute,
+               Client:         hc,
+               Scheme:         ctrlURL.Scheme,
+               APIHost:        ctrlURL.Host,
+               Insecure:       cluster.TLS.Insecure,
+               Timeout:        5 * time.Minute,
+               requestLimiter: requestLimiter{maxlimit: int64(cluster.API.MaxConcurrentRequests / 4)},
        }, nil
 }
 
index f62264c636f96dfa4b55ff0d581fc8cd50b05c09..dc944160ab2dd5d459a31fa2386ebe9a5f6bb2c3 100644 (file)
@@ -13,11 +13,15 @@ import (
        "time"
 )
 
-var requestLimiterQuietPeriod = time.Second
+var (
+       requestLimiterQuietPeriod        = time.Second
+       requestLimiterInitialLimit int64 = 8
+)
 
 type requestLimiter struct {
        current    int64
        limit      int64
+       maxlimit   int64
        lock       sync.Mutex
        cond       *sync.Cond
        quietUntil time.Time
@@ -33,6 +37,7 @@ func (rl *requestLimiter) Acquire(ctx context.Context) {
        if rl.cond == nil {
                // First use of requestLimiter. Initialize.
                rl.cond = sync.NewCond(&rl.lock)
+               rl.limit = requestLimiterInitialLimit
        }
        // Wait out the quiet period(s) immediately following a 503.
        for ctx.Err() == nil {
@@ -137,9 +142,12 @@ func (rl *requestLimiter) Report(resp *http.Response, err error) bool {
                        increase = 1
                }
                rl.limit += increase
-               if max := rl.current * 2; max > rl.limit {
+               if max := rl.current * 2; max < rl.limit {
                        rl.limit = max
                }
+               if rl.maxlimit > 0 && rl.maxlimit < rl.limit {
+                       rl.limit = rl.maxlimit
+               }
                rl.cond.Broadcast()
        }
        return false
index d32ab9699999d97a7dad2c63c4332e937419f739..1e73b1c28f44555da1be00ca926ccc7e9c0f7946 100644 (file)
@@ -18,23 +18,23 @@ var _ = Suite(&limiterSuite{})
 
 type limiterSuite struct{}
 
-func (*limiterSuite) TestUnlimitedBeforeFirstReport(c *C) {
+func (*limiterSuite) TestInitialLimit(c *C) {
        ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(time.Minute))
        defer cancel()
        rl := requestLimiter{}
 
        var wg sync.WaitGroup
-       wg.Add(1000)
-       for i := 0; i < 1000; i++ {
+       wg.Add(int(requestLimiterInitialLimit))
+       for i := int64(0); i < requestLimiterInitialLimit; i++ {
                go func() {
                        rl.Acquire(ctx)
                        wg.Done()
                }()
        }
        wg.Wait()
-       c.Check(rl.current, Equals, int64(1000))
-       wg.Add(1000)
-       for i := 0; i < 1000; i++ {
+       c.Check(rl.current, Equals, requestLimiterInitialLimit)
+       wg.Add(int(requestLimiterInitialLimit))
+       for i := int64(0); i < requestLimiterInitialLimit; i++ {
                go func() {
                        rl.Release()
                        wg.Done()
@@ -49,8 +49,8 @@ func (*limiterSuite) TestCancelWhileWaitingForAcquire(c *C) {
        defer cancel()
        rl := requestLimiter{}
 
-       rl.limit = 1
        rl.Acquire(ctx)
+       rl.limit = 1
        ctxShort, cancel := context.WithDeadline(ctx, time.Now().Add(time.Millisecond))
        defer cancel()
        rl.Acquire(ctxShort)
@@ -74,7 +74,7 @@ func (*limiterSuite) TestReducedLimitAndQuietPeriod(c *C) {
                rl.Acquire(ctx)
        }
        rl.Report(&http.Response{StatusCode: http.StatusServiceUnavailable}, nil)
-       c.Check(rl.limit, Equals, int64(3))
+       c.Check(rl.limit, Equals, requestLimiterInitialLimit/2)
        for i := 0; i < 5; i++ {
                rl.Release()
        }
index ef5a91b270d074fe2064d04808ebf23d02efc4d5..51842b6e2a2e5b177ff29a013e0f3163fcc196d3 100644 (file)
@@ -108,9 +108,10 @@ arvados:
             Password: __INITIAL_USER_PASSWORD__
 
     ### API
-    {%- if "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" != "" %}
+    {%- set max_reqs = "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" %}
+    {%- if max_reqs != "" and max_reqs is number %}
     API:
-      MaxConcurrentRequests: __CONTROLLER_MAX_CONCURRENT_REQUESTS__
+      MaxConcurrentRequests: max_reqs
     {%- endif %}
 
     ### CONTAINERS
index 775ff80fe44450f5ecea970c0005ef0adfb01d4d..7bbf9ae618a3c065e78af5ecfaad8d126c98eb62 100644 (file)
@@ -22,9 +22,11 @@ nginx:
   passenger:
     passenger_ruby: {{ passenger_ruby }}
     passenger_max_pool_size: {{ "__CONTROLLER_NGINX_WORKERS__" or grains['num_cpus'] }}
-    {%- if "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" != "" %}
-    # Default is 100
-    passenger_max_request_queue_size: __CONTROLLER_MAX_CONCURRENT_REQUESTS__
+    {%- set max_reqs = "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" %}
+    {%- if max_reqs != "" and max_reqs is number %}
+    # Default is 100 -- Configuring this a bit higher than API.MaxConcurrentRequests
+    # to be able to handle /metrics requests even on heavy load situations.
+    passenger_max_request_queue_size: {{ (max_reqs * 1.1)|round|int }}
     {%- endif %}
 
   ### SERVER