From 2966d83fa4e18eb62d1bbb1c9a0c39d9d845112f Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 26 Oct 2023 20:59:37 -0400 Subject: [PATCH] 21124: Add separate MaxConcurrentRailsRequests config. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- doc/admin/upgrading.html.textile.liquid | 8 +++++++ lib/config/config.default.yml | 6 ++++- lib/config/export.go | 1 + lib/service/cmd.go | 23 +++++++++++++++---- lib/service/cmd_test.go | 2 +- sdk/go/arvados/config.go | 1 + .../multi_host/aws/pillars/arvados.sls | 3 ++- .../aws/pillars/nginx_passenger.sls | 2 +- 8 files changed, 37 insertions(+), 9 deletions(-) diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid index 46b008ca70..ee7ac4e44e 100644 --- a/doc/admin/upgrading.html.textile.liquid +++ b/doc/admin/upgrading.html.textile.liquid @@ -28,6 +28,14 @@ TODO: extract this information based on git commit messages and generate changel
+h2(#main). development main + +"previous: Upgrading to 2.7.0":#v2_7_0 + +The default configuration value @API.MaxConcurrentRequests@ (the number of concurrent requests that will be processed by a single instance of an arvados service process) is raised from 8 to 64. + +A new configuration key @API.MaxConcurrentRailsRequests@ (default 8) limits the number of concurrent requests processed by a RailsAPI service process. + h2(#v2_7_0). v2.7.0 (2023-09-21) "previous: Upgrading to 2.6.3":#v2_6_3 diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 32727b1bce..c5a164e790 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -225,7 +225,11 @@ Clusters: # Maximum number of concurrent requests to process concurrently # in a single service process, or 0 for no limit. - MaxConcurrentRequests: 8 + MaxConcurrentRequests: 64 + + # Maximum number of concurrent requests to process concurrently + # in a single RailsAPI service process, or 0 for no limit. + MaxConcurrentRailsRequests: 8 # Maximum number of incoming requests to hold in a priority # queue waiting for one of the MaxConcurrentRequests slots to be diff --git a/lib/config/export.go b/lib/config/export.go index 88c64f69a1..e1f5ff9ee1 100644 --- a/lib/config/export.go +++ b/lib/config/export.go @@ -68,6 +68,7 @@ var whitelist = map[string]bool{ "API.KeepServiceRequestTimeout": false, "API.LockBeforeUpdate": false, "API.LogCreateRequestFraction": false, + "API.MaxConcurrentRailsRequests": false, "API.MaxConcurrentRequests": false, "API.MaxIndexDatabaseRead": false, "API.MaxItemsPerResponse": true, diff --git a/lib/service/cmd.go b/lib/service/cmd.go index 854b94861f..725f86f3bd 100644 --- a/lib/service/cmd.go +++ b/lib/service/cmd.go @@ -148,6 +148,19 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout return 1 } + maxReqs := cluster.API.MaxConcurrentRequests + if maxRails := cluster.API.MaxConcurrentRailsRequests; maxRails > 0 && + (maxRails < maxReqs || maxReqs == 0) && + strings.HasSuffix(prog, "controller") { + // Ideally, we would accept up to + // MaxConcurrentRequests, and apply the + // MaxConcurrentRailsRequests limit only for requests + // that require calling upstream to RailsAPI. But for + // now we make the simplifying assumption that every + // controller request causes an upstream RailsAPI + // request. + maxReqs = maxRails + } instrumented := httpserver.Instrument(reg, log, httpserver.HandlerWithDeadline(cluster.API.RequestTimeout.Duration(), httpserver.AddRequestIDs( @@ -156,7 +169,7 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout interceptHealthReqs(cluster.ManagementToken, handler.CheckHealth, &httpserver.RequestLimiter{ Handler: handler, - MaxConcurrent: cluster.API.MaxConcurrentRequests, + MaxConcurrent: maxReqs, MaxQueue: cluster.API.MaxQueuedRequests, MaxQueueTimeForMinPriority: cluster.API.MaxQueueTimeForLockRequests.Duration(), Priority: c.requestPriority, @@ -199,7 +212,7 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout <-handler.Done() srv.Close() }() - go c.requestQueueDumpCheck(cluster, prog, reg, &srv.Server, logger) + go c.requestQueueDumpCheck(cluster, maxReqs, prog, reg, &srv.Server, logger) err = srv.Wait() if err != nil { return 1 @@ -211,9 +224,9 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout // server's incoming HTTP request queue size. When it exceeds 90% of // API.MaxConcurrentRequests, write the /_inspect/requests data to a // JSON file in the specified directory. -func (c *command) requestQueueDumpCheck(cluster *arvados.Cluster, prog string, reg *prometheus.Registry, srv *http.Server, logger logrus.FieldLogger) { +func (c *command) requestQueueDumpCheck(cluster *arvados.Cluster, maxReqs int, prog string, reg *prometheus.Registry, srv *http.Server, logger logrus.FieldLogger) { outdir := cluster.SystemLogs.RequestQueueDumpDirectory - if outdir == "" || cluster.ManagementToken == "" || cluster.API.MaxConcurrentRequests < 1 { + if outdir == "" || cluster.ManagementToken == "" || maxReqs < 1 { return } logger = logger.WithField("worker", "RequestQueueDump") @@ -228,7 +241,7 @@ func (c *command) requestQueueDumpCheck(cluster *arvados.Cluster, prog string, r for _, mf := range mfs { if mf.Name != nil && *mf.Name == "arvados_concurrent_requests" && len(mf.Metric) == 1 { n := int(mf.Metric[0].GetGauge().GetValue()) - if n > 0 && n >= cluster.API.MaxConcurrentRequests*9/10 { + if n > 0 && n >= maxReqs*9/10 { dump = true break } diff --git a/lib/service/cmd_test.go b/lib/service/cmd_test.go index 97a6bd8a4c..ee0d4bb836 100644 --- a/lib/service/cmd_test.go +++ b/lib/service/cmd_test.go @@ -211,7 +211,7 @@ Clusters: SystemRootToken: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ManagementToken: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb API: - MaxConcurrentRequests: %d + MaxConcurrentRailsRequests: %d MaxQueuedRequests: 0 SystemLogs: {RequestQueueDumpDirectory: %q} Services: diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go index a3e54952da..6e6c5298e4 100644 --- a/sdk/go/arvados/config.go +++ b/sdk/go/arvados/config.go @@ -99,6 +99,7 @@ type Cluster struct { DisabledAPIs StringSet MaxIndexDatabaseRead int MaxItemsPerResponse int + MaxConcurrentRailsRequests int MaxConcurrentRequests int MaxQueuedRequests int MaxQueueTimeForLockRequests Duration diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls index 84df363c2e..064a70a8ed 100644 --- a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls +++ b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls @@ -117,7 +117,8 @@ arvados: ### API API: - MaxConcurrentRequests: {{ max_workers * 2 }} + MaxConcurrentRailsRequests: {{ max_workers * 2 }} + MaxConcurrentRequests: {{ max_reqs }} MaxQueuedRequests: {{ max_reqs }} ### CONTAINERS diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls index 4c0aea25fe..de4c830906 100644 --- a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls +++ b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls @@ -29,7 +29,7 @@ nginx: # Make the passenger queue small (twice the concurrency, so # there's at most one pending request for each busy worker) # because controller reorders requests based on priority, and - # won't send more than API.MaxConcurrentRequests to passenger + # won't send more than API.MaxConcurrentRailsRequests to passenger # (which is max_workers * 2), so things that are moved to the head # of the line get processed quickly. passenger_max_request_queue_size: {{ max_workers * 2 + 1 }} -- 2.30.2