Merge branch '20520-instance-init-command'

author Tom Clegg <tom@curii.com>

Fri, 26 May 2023 19:30:25 +0000 (15:30 -0400)

committer Tom Clegg <tom@curii.com>

Fri, 26 May 2023 19:30:25 +0000 (15:30 -0400)
author Tom Clegg <tom@curii.com>
Fri, 26 May 2023 19:30:25 +0000 (15:30 -0400)
committer Tom Clegg <tom@curii.com>
Fri, 26 May 2023 19:30:25 +0000 (15:30 -0400)
diff --git a/doc/admin/metrics.html.textile.liquid b/doc/admin/metrics.html.textile.liquid

index b140bcc1badda0c2996725bf62a026345c0646c6..79d895ae48df1bf53d8ec2c0e1c6eafc3f64f237 100644 (file)
--- a/doc/admin/metrics.html.textile.liquid
+++ b/doc/admin/metrics.html.textile.liquid
@@ -31,7 +31,7 @@ When configuring Prometheus, use a @bearer_token@ or @bearer_token_file@ option
  
  table(table table-bordered table-condensed table-hover).
  |_. Component|_. Metrics endpoint|
-|arvados-api-server||
+|arvados-api-server|✓|
  |arvados-controller|✓|
  |arvados-dispatch-cloud|✓|
  |arvados-dispatch-lsf|✓|
diff --git a/lib/controller/integration_test.go b/lib/controller/integration_test.go

index 12fc50089df0cc2cb9b926e4f78acccfc936321a..0557aa3fddde824ea6f3cd1f7c1f8e09a974da2b 100644 (file)
--- a/lib/controller/integration_test.go
+++ b/lib/controller/integration_test.go
@@ -501,6 +501,7 @@ func (s *IntegrationSuite) TestCreateContainerRequestWithFedToken(c *check.C) {
         req.Header.Set("Authorization", "OAuth2 "+ac2.AuthToken)
         resp, err = arvados.InsecureHTTPClient.Do(req)
         c.Assert(err, check.IsNil)
+       defer resp.Body.Close()
         err = json.NewDecoder(resp.Body).Decode(&cr)
         c.Check(err, check.IsNil)
         c.Check(cr.UUID, check.Matches, "z2222-.*")
@@ -538,8 +539,10 @@ func (s *IntegrationSuite) TestCreateContainerRequestWithBadToken(c *check.C) {
                 c.Assert(err, check.IsNil)
                 req.Header.Set("Content-Type", "application/json")
                 resp, err := ac1.Do(req)
-               c.Assert(err, check.IsNil)
-               c.Assert(resp.StatusCode, check.Equals, tt.expectedCode)
+               if c.Check(err, check.IsNil) {
+                       c.Assert(resp.StatusCode, check.Equals, tt.expectedCode)
+                       resp.Body.Close()
+               }
         }
  }
  
@@ -607,9 +610,11 @@ func (s *IntegrationSuite) TestRequestIDHeader(c *check.C) {
                         var jresp httpserver.ErrorResponse
                         err := json.NewDecoder(resp.Body).Decode(&jresp)
                         c.Check(err, check.IsNil)
-                       c.Assert(jresp.Errors, check.HasLen, 1)
-                       c.Check(jresp.Errors[0], check.Matches, `.*\(`+respHdr+`\).*`)
+                       if c.Check(jresp.Errors, check.HasLen, 1) {
+                               c.Check(jresp.Errors[0], check.Matches, `.*\(`+respHdr+`\).*`)
+                       }
                 }
+               resp.Body.Close()
         }
  }
  
diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go

index 8f4c2e083da584a869c7b8c34bb8d69434ad1584..dcb348878dcb5404eac881e6a3301c61a58e4ee1 100644 (file)
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -16,12 +16,25 @@ import (
  var quietAfter503 = time.Minute
  
  func (sch *Scheduler) runQueue() {
+       running := sch.pool.Running()
+       unalloc := sch.pool.Unallocated()
+
         unsorted, _ := sch.queue.Entries()
         sorted := make([]container.QueueEnt, 0, len(unsorted))
         for _, ent := range unsorted {
                 sorted = append(sorted, ent)
         }
         sort.Slice(sorted, func(i, j int) bool {
+               _, irunning := running[sorted[i].Container.UUID]
+               _, jrunning := running[sorted[j].Container.UUID]
+               if irunning != jrunning {
+                       // Ensure the "tryrun" loop (see below) sees
+                       // already-scheduled containers first, to
+                       // ensure existing supervisor containers are
+                       // properly counted before we decide whether
+                       // we have room for new ones.
+                       return irunning
+               }
                 ilocked := sorted[i].Container.State == arvados.ContainerStateLocked
                 jlocked := sorted[j].Container.State == arvados.ContainerStateLocked
                 if ilocked != jlocked {
@@ -46,9 +59,6 @@ func (sch *Scheduler) runQueue() {
                 }
         })
  
-       running := sch.pool.Running()
-       unalloc := sch.pool.Unallocated()
-
         if t := sch.client.Last503(); t.After(sch.last503time) {
                 // API has sent an HTTP 503 response since last time
                 // we checked. Use current #containers - 1 as
@@ -120,8 +130,7 @@ tryrun:
                 case arvados.ContainerStateQueued:
                         if sch.maxConcurrency > 0 && trying >= sch.maxConcurrency {
                                 logger.Tracef("not locking: already at maxConcurrency %d", sch.maxConcurrency)
-                               overquota = sorted[i:]
-                               break tryrun
+                               continue
                         }
                         trying++
                         if unalloc[it] < 1 && sch.pool.AtQuota() {
@@ -137,9 +146,8 @@ tryrun:
                         unalloc[it]--
                 case arvados.ContainerStateLocked:
                         if sch.maxConcurrency > 0 && trying >= sch.maxConcurrency {
-                               logger.Debugf("not starting: already at maxConcurrency %d", sch.maxConcurrency)
-                               overquota = sorted[i:]
-                               break tryrun
+                               logger.Tracef("not starting: already at maxConcurrency %d", sch.maxConcurrency)
+                               continue
                         }
                         trying++
                         if unalloc[it] > 0 {
@@ -206,6 +214,8 @@ tryrun:
                                 }
                         }
                 }
+       }
+       if len(overquota) > 0 {
                 // Shut down idle workers that didn't get any
                 // containers mapped onto them before we hit quota.
                 for it, n := range unalloc {
diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go

index 3278c7de69333926dfea8363b3f41626be294a21..73602f810971d6c109679b206f9b5bedbcb38945 100644 (file)
--- a/lib/dispatchcloud/scheduler/run_queue_test.go
+++ b/lib/dispatchcloud/scheduler/run_queue_test.go
@@ -278,6 +278,214 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
         }
  }
  
+// Don't unlock containers or shutdown unalloc (booting/idle) nodes
+// just because some 503 errors caused us to reduce maxConcurrency
+// below the current load level.
+//
+// We expect to raise maxConcurrency soon when we stop seeing 503s. If
+// that doesn't happen soon, the idle timeout will take care of the
+// excess nodes.
+func (*SchedulerSuite) TestIdleIn503QuietPeriod(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
+       queue := test.Queue{
+               ChooseType: chooseType,
+               Containers: []arvados.Container{
+                       // scheduled on an instance (but not Running yet)
+                       {
+                               UUID:     test.ContainerUUID(1),
+                               Priority: 1000,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 2,
+                                       RAM:   2 << 30,
+                               },
+                       },
+                       // not yet scheduled
+                       {
+                               UUID:     test.ContainerUUID(2),
+                               Priority: 1000,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 2,
+                                       RAM:   2 << 30,
+                               },
+                       },
+                       // scheduled on an instance (but not Running yet)
+                       {
+                               UUID:     test.ContainerUUID(3),
+                               Priority: 1000,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 3,
+                                       RAM:   3 << 30,
+                               },
+                       },
+                       // not yet scheduled
+                       {
+                               UUID:     test.ContainerUUID(4),
+                               Priority: 1000,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 3,
+                                       RAM:   3 << 30,
+                               },
+                       },
+                       // not yet locked
+                       {
+                               UUID:     test.ContainerUUID(5),
+                               Priority: 1000,
+                               State:    arvados.ContainerStateQueued,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 3,
+                                       RAM:   3 << 30,
+                               },
+                       },
+               },
+       }
+       queue.Update()
+       pool := stubPool{
+               quota: 16,
+               unalloc: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 2,
+                       test.InstanceType(3): 2,
+               },
+               idle: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 1,
+                       test.InstanceType(3): 1,
+               },
+               running: map[string]time.Time{
+                       test.ContainerUUID(1): {},
+                       test.ContainerUUID(3): {},
+               },
+               creates:   []arvados.InstanceType{},
+               starts:    []string{},
+               canCreate: 0,
+       }
+       sch := New(ctx, arvados.NewClientFromEnv(), &queue, &pool, nil, time.Millisecond, time.Millisecond, 0)
+       sch.last503time = time.Now()
+       sch.maxConcurrency = 3
+       sch.sync()
+       sch.runQueue()
+       sch.sync()
+
+       c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(2)})
+       c.Check(pool.shutdowns, check.Equals, 0)
+       c.Check(pool.creates, check.HasLen, 0)
+       c.Check(queue.StateChanges(), check.HasLen, 0)
+}
+
+// If we somehow have more supervisor containers in Locked state than
+// we should (e.g., config changed since they started), and some
+// appropriate-sized instances booting up, unlock the excess
+// supervisor containers, but let the instances keep booting.
+func (*SchedulerSuite) TestUnlockExcessSupervisors(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
+       queue := test.Queue{
+               ChooseType: chooseType,
+       }
+       for i := 1; i <= 6; i++ {
+               queue.Containers = append(queue.Containers, arvados.Container{
+                       UUID:     test.ContainerUUID(i),
+                       Priority: int64(1000 - i),
+                       State:    arvados.ContainerStateLocked,
+                       RuntimeConstraints: arvados.RuntimeConstraints{
+                               VCPUs: 2,
+                               RAM:   2 << 30,
+                       },
+                       SchedulingParameters: arvados.SchedulingParameters{
+                               Supervisor: true,
+                       },
+               })
+       }
+       queue.Update()
+       pool := stubPool{
+               quota: 16,
+               unalloc: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 2,
+               },
+               idle: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 1,
+               },
+               running: map[string]time.Time{
+                       test.ContainerUUID(1): {},
+                       test.ContainerUUID(2): {},
+                       test.ContainerUUID(3): {},
+                       test.ContainerUUID(4): {},
+               },
+               creates:   []arvados.InstanceType{},
+               starts:    []string{},
+               canCreate: 0,
+       }
+       sch := New(ctx, arvados.NewClientFromEnv(), &queue, &pool, nil, time.Millisecond, time.Millisecond, 4)
+       sch.sync()
+       sch.runQueue()
+       sch.sync()
+
+       c.Check(pool.starts, check.DeepEquals, []string{})
+       c.Check(pool.shutdowns, check.Equals, 0)
+       c.Check(pool.creates, check.HasLen, 0)
+       c.Check(queue.StateChanges(), check.DeepEquals, []test.QueueStateChange{
+               {UUID: test.ContainerUUID(5), From: "Locked", To: "Queued"},
+               {UUID: test.ContainerUUID(6), From: "Locked", To: "Queued"},
+       })
+}
+
+// Assuming we're not at quota, don't try to shutdown idle nodes
+// merely because we have more queued/locked supervisor containers
+// than MaxSupervisors -- it won't help.
+func (*SchedulerSuite) TestExcessSupervisors(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
+       queue := test.Queue{
+               ChooseType: chooseType,
+       }
+       for i := 1; i <= 8; i++ {
+               queue.Containers = append(queue.Containers, arvados.Container{
+                       UUID:     test.ContainerUUID(i),
+                       Priority: int64(1000 + i),
+                       State:    arvados.ContainerStateQueued,
+                       RuntimeConstraints: arvados.RuntimeConstraints{
+                               VCPUs: 2,
+                               RAM:   2 << 30,
+                       },
+                       SchedulingParameters: arvados.SchedulingParameters{
+                               Supervisor: true,
+                       },
+               })
+       }
+       for i := 2; i < 4; i++ {
+               queue.Containers[i].State = arvados.ContainerStateLocked
+       }
+       for i := 4; i < 6; i++ {
+               queue.Containers[i].State = arvados.ContainerStateRunning
+       }
+       queue.Update()
+       pool := stubPool{
+               quota: 16,
+               unalloc: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 2,
+               },
+               idle: map[arvados.InstanceType]int{
+                       test.InstanceType(2): 1,
+               },
+               running: map[string]time.Time{
+                       test.ContainerUUID(5): {},
+                       test.ContainerUUID(6): {},
+               },
+               creates:   []arvados.InstanceType{},
+               starts:    []string{},
+               canCreate: 0,
+       }
+       sch := New(ctx, arvados.NewClientFromEnv(), &queue, &pool, nil, time.Millisecond, time.Millisecond, 4)
+       sch.sync()
+       sch.runQueue()
+       sch.sync()
+
+       c.Check(pool.starts, check.HasLen, 2)
+       c.Check(pool.shutdowns, check.Equals, 0)
+       c.Check(pool.creates, check.HasLen, 0)
+       c.Check(queue.StateChanges(), check.HasLen, 0)
+}
+
  // Don't flap lock/unlock when equal-priority containers compete for
  // limited workers.
  //
diff --git a/sdk/go/arvados/client.go b/sdk/go/arvados/client.go

index d1a87e30e8c33408d0b9c51d18fc6ddc28466aac..6316d1bedaceb35bc7d3f5578aff3425078aabb7 100644 (file)
--- a/sdk/go/arvados/client.go
+++ b/sdk/go/arvados/client.go
@@ -142,11 +142,12 @@ func NewClientFromConfig(cluster *Cluster) (*Client, error) {
                 }
         }
         return &Client{
-               Client:   hc,
-               Scheme:   ctrlURL.Scheme,
-               APIHost:  ctrlURL.Host,
-               Insecure: cluster.TLS.Insecure,
-               Timeout:  5 * time.Minute,
+               Client:         hc,
+               Scheme:         ctrlURL.Scheme,
+               APIHost:        ctrlURL.Host,
+               Insecure:       cluster.TLS.Insecure,
+               Timeout:        5 * time.Minute,
+               requestLimiter: requestLimiter{maxlimit: int64(cluster.API.MaxConcurrentRequests / 4)},
         }, nil
  }
  
diff --git a/sdk/go/arvados/limiter.go b/sdk/go/arvados/limiter.go

index f62264c636f96dfa4b55ff0d581fc8cd50b05c09..dc944160ab2dd5d459a31fa2386ebe9a5f6bb2c3 100644 (file)
--- a/sdk/go/arvados/limiter.go
+++ b/sdk/go/arvados/limiter.go
@@ -13,11 +13,15 @@ import (
         "time"
  )
  
-var requestLimiterQuietPeriod = time.Second
+var (
+       requestLimiterQuietPeriod        = time.Second
+       requestLimiterInitialLimit int64 = 8
+)
  
  type requestLimiter struct {
         current    int64
         limit      int64
+       maxlimit   int64
         lock       sync.Mutex
         cond       *sync.Cond
         quietUntil time.Time
@@ -33,6 +37,7 @@ func (rl *requestLimiter) Acquire(ctx context.Context) {
         if rl.cond == nil {
                 // First use of requestLimiter. Initialize.
                 rl.cond = sync.NewCond(&rl.lock)
+               rl.limit = requestLimiterInitialLimit
         }
         // Wait out the quiet period(s) immediately following a 503.
         for ctx.Err() == nil {
@@ -137,9 +142,12 @@ func (rl *requestLimiter) Report(resp *http.Response, err error) bool {
                         increase = 1
                 }
                 rl.limit += increase
-               if max := rl.current * 2; max > rl.limit {
+               if max := rl.current * 2; max < rl.limit {
                         rl.limit = max
                 }
+               if rl.maxlimit > 0 && rl.maxlimit < rl.limit {
+                       rl.limit = rl.maxlimit
+               }
                 rl.cond.Broadcast()
         }
         return false
diff --git a/sdk/go/arvados/limiter_test.go b/sdk/go/arvados/limiter_test.go

index d32ab9699999d97a7dad2c63c4332e937419f739..1e73b1c28f44555da1be00ca926ccc7e9c0f7946 100644 (file)
--- a/sdk/go/arvados/limiter_test.go
+++ b/sdk/go/arvados/limiter_test.go
@@ -18,23 +18,23 @@ var _ = Suite(&limiterSuite{})
  
  type limiterSuite struct{}
  
-func (*limiterSuite) TestUnlimitedBeforeFirstReport(c *C) {
+func (*limiterSuite) TestInitialLimit(c *C) {
         ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(time.Minute))
         defer cancel()
         rl := requestLimiter{}
  
         var wg sync.WaitGroup
-       wg.Add(1000)
-       for i := 0; i < 1000; i++ {
+       wg.Add(int(requestLimiterInitialLimit))
+       for i := int64(0); i < requestLimiterInitialLimit; i++ {
                 go func() {
                         rl.Acquire(ctx)
                         wg.Done()
                 }()
         }
         wg.Wait()
-       c.Check(rl.current, Equals, int64(1000))
-       wg.Add(1000)
-       for i := 0; i < 1000; i++ {
+       c.Check(rl.current, Equals, requestLimiterInitialLimit)
+       wg.Add(int(requestLimiterInitialLimit))
+       for i := int64(0); i < requestLimiterInitialLimit; i++ {
                 go func() {
                         rl.Release()
                         wg.Done()
@@ -49,8 +49,8 @@ func (*limiterSuite) TestCancelWhileWaitingForAcquire(c *C) {
         defer cancel()
         rl := requestLimiter{}
  
-       rl.limit = 1
         rl.Acquire(ctx)
+       rl.limit = 1
         ctxShort, cancel := context.WithDeadline(ctx, time.Now().Add(time.Millisecond))
         defer cancel()
         rl.Acquire(ctxShort)
@@ -74,7 +74,7 @@ func (*limiterSuite) TestReducedLimitAndQuietPeriod(c *C) {
                 rl.Acquire(ctx)
         }
         rl.Report(&http.Response{StatusCode: http.StatusServiceUnavailable}, nil)
-       c.Check(rl.limit, Equals, int64(3))
+       c.Check(rl.limit, Equals, requestLimiterInitialLimit/2)
         for i := 0; i < 5; i++ {
                 rl.Release()
         }
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls

index ef5a91b270d074fe2064d04808ebf23d02efc4d5..51842b6e2a2e5b177ff29a013e0f3163fcc196d3 100644 (file)
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
@@ -108,9 +108,10 @@ arvados:
              Password: __INITIAL_USER_PASSWORD__
  
      ### API
-    {%- if "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" != "" %}
+    {%- set max_reqs = "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" %}
+    {%- if max_reqs != "" and max_reqs is number %}
      API:
-      MaxConcurrentRequests: __CONTROLLER_MAX_CONCURRENT_REQUESTS__
+      MaxConcurrentRequests: max_reqs
      {%- endif %}
  
      ### CONTAINERS
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls

index 775ff80fe44450f5ecea970c0005ef0adfb01d4d..7bbf9ae618a3c065e78af5ecfaad8d126c98eb62 100644 (file)
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls
@@ -22,9 +22,11 @@ nginx:
    passenger:
      passenger_ruby: {{ passenger_ruby }}
      passenger_max_pool_size: {{ "__CONTROLLER_NGINX_WORKERS__" or grains['num_cpus'] }}
-    {%- if "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" != "" %}
-    # Default is 100
-    passenger_max_request_queue_size: __CONTROLLER_MAX_CONCURRENT_REQUESTS__
+    {%- set max_reqs = "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" %}
+    {%- if max_reqs != "" and max_reqs is number %}
+    # Default is 100 -- Configuring this a bit higher than API.MaxConcurrentRequests
+    # to be able to handle /metrics requests even on heavy load situations.
+    passenger_max_request_queue_size: {{ (max_reqs * 1.1)|round|int }}
      {%- endif %}
  
    ### SERVER
author	Tom Clegg <tom@curii.com>
	Fri, 26 May 2023 19:30:25 +0000 (15:30 -0400)
committer	Tom Clegg <tom@curii.com>
	Fri, 26 May 2023 19:30:25 +0000 (15:30 -0400)
doc/admin/metrics.html.textile.liquid		patch \| blob \| history
lib/controller/integration_test.go		patch \| blob \| history
lib/dispatchcloud/scheduler/run_queue.go		patch \| blob \| history
lib/dispatchcloud/scheduler/run_queue_test.go		patch \| blob \| history
sdk/go/arvados/client.go		patch \| blob \| history
sdk/go/arvados/limiter.go		patch \| blob \| history
sdk/go/arvados/limiter_test.go		patch \| blob \| history
tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls		patch \| blob \| history
tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls		patch \| blob \| history