21124: Add separate MaxConcurrentRailsRequests config.
authorTom Clegg <tom@curii.com>
Fri, 27 Oct 2023 00:59:37 +0000 (20:59 -0400)
committerTom Clegg <tom@curii.com>
Fri, 27 Oct 2023 00:59:37 +0000 (20:59 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

doc/admin/upgrading.html.textile.liquid
lib/config/config.default.yml
lib/config/export.go
lib/service/cmd.go
lib/service/cmd_test.go
sdk/go/arvados/config.go
tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls

index 46b008ca70d4e6a45c9408fec69f64ba18b7d239..ee7ac4e44e97aeebb860868e872f7b57270fa879 100644 (file)
@@ -28,6 +28,14 @@ TODO: extract this information based on git commit messages and generate changel
 <div class="releasenotes">
 </notextile>
 
+h2(#main). development main
+
+"previous: Upgrading to 2.7.0":#v2_7_0
+
+The default configuration value @API.MaxConcurrentRequests@ (the number of concurrent requests that will be processed by a single instance of an arvados service process) is raised from 8 to 64.
+
+A new configuration key @API.MaxConcurrentRailsRequests@ (default 8) limits the number of concurrent requests processed by a RailsAPI service process.
+
 h2(#v2_7_0). v2.7.0 (2023-09-21)
 
 "previous: Upgrading to 2.6.3":#v2_6_3
index 32727b1bce78226ac34e27d3eb9791c173a34cbc..c5a164e7906902259841aae6f39bef5a0e77db50 100644 (file)
@@ -225,7 +225,11 @@ Clusters:
 
       # Maximum number of concurrent requests to process concurrently
       # in a single service process, or 0 for no limit.
-      MaxConcurrentRequests: 8
+      MaxConcurrentRequests: 64
+
+      # Maximum number of concurrent requests to process concurrently
+      # in a single RailsAPI service process, or 0 for no limit.
+      MaxConcurrentRailsRequests: 8
 
       # Maximum number of incoming requests to hold in a priority
       # queue waiting for one of the MaxConcurrentRequests slots to be
index 88c64f69a10cf66d641db65f96a51de8a38c7dfd..e1f5ff9ee13473d77328e8c8521d9d4370b76483 100644 (file)
@@ -68,6 +68,7 @@ var whitelist = map[string]bool{
        "API.KeepServiceRequestTimeout":            false,
        "API.LockBeforeUpdate":                     false,
        "API.LogCreateRequestFraction":             false,
+       "API.MaxConcurrentRailsRequests":           false,
        "API.MaxConcurrentRequests":                false,
        "API.MaxIndexDatabaseRead":                 false,
        "API.MaxItemsPerResponse":                  true,
index 854b94861f1362f9e58592f771400bed28b9afaa..725f86f3bda5c2a82476615ba9ecd6e7a9b7a4fa 100644 (file)
@@ -148,6 +148,19 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
                return 1
        }
 
+       maxReqs := cluster.API.MaxConcurrentRequests
+       if maxRails := cluster.API.MaxConcurrentRailsRequests; maxRails > 0 &&
+               (maxRails < maxReqs || maxReqs == 0) &&
+               strings.HasSuffix(prog, "controller") {
+               // Ideally, we would accept up to
+               // MaxConcurrentRequests, and apply the
+               // MaxConcurrentRailsRequests limit only for requests
+               // that require calling upstream to RailsAPI. But for
+               // now we make the simplifying assumption that every
+               // controller request causes an upstream RailsAPI
+               // request.
+               maxReqs = maxRails
+       }
        instrumented := httpserver.Instrument(reg, log,
                httpserver.HandlerWithDeadline(cluster.API.RequestTimeout.Duration(),
                        httpserver.AddRequestIDs(
@@ -156,7 +169,7 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
                                                interceptHealthReqs(cluster.ManagementToken, handler.CheckHealth,
                                                        &httpserver.RequestLimiter{
                                                                Handler:                    handler,
-                                                               MaxConcurrent:              cluster.API.MaxConcurrentRequests,
+                                                               MaxConcurrent:              maxReqs,
                                                                MaxQueue:                   cluster.API.MaxQueuedRequests,
                                                                MaxQueueTimeForMinPriority: cluster.API.MaxQueueTimeForLockRequests.Duration(),
                                                                Priority:                   c.requestPriority,
@@ -199,7 +212,7 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
                <-handler.Done()
                srv.Close()
        }()
-       go c.requestQueueDumpCheck(cluster, prog, reg, &srv.Server, logger)
+       go c.requestQueueDumpCheck(cluster, maxReqs, prog, reg, &srv.Server, logger)
        err = srv.Wait()
        if err != nil {
                return 1
@@ -211,9 +224,9 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
 // server's incoming HTTP request queue size. When it exceeds 90% of
 // API.MaxConcurrentRequests, write the /_inspect/requests data to a
 // JSON file in the specified directory.
-func (c *command) requestQueueDumpCheck(cluster *arvados.Cluster, prog string, reg *prometheus.Registry, srv *http.Server, logger logrus.FieldLogger) {
+func (c *command) requestQueueDumpCheck(cluster *arvados.Cluster, maxReqs int, prog string, reg *prometheus.Registry, srv *http.Server, logger logrus.FieldLogger) {
        outdir := cluster.SystemLogs.RequestQueueDumpDirectory
-       if outdir == "" || cluster.ManagementToken == "" || cluster.API.MaxConcurrentRequests < 1 {
+       if outdir == "" || cluster.ManagementToken == "" || maxReqs < 1 {
                return
        }
        logger = logger.WithField("worker", "RequestQueueDump")
@@ -228,7 +241,7 @@ func (c *command) requestQueueDumpCheck(cluster *arvados.Cluster, prog string, r
                for _, mf := range mfs {
                        if mf.Name != nil && *mf.Name == "arvados_concurrent_requests" && len(mf.Metric) == 1 {
                                n := int(mf.Metric[0].GetGauge().GetValue())
-                               if n > 0 && n >= cluster.API.MaxConcurrentRequests*9/10 {
+                               if n > 0 && n >= maxReqs*9/10 {
                                        dump = true
                                        break
                                }
index 97a6bd8a4c979cd26b39c4c2cd93c75e82dbcd13..ee0d4bb836339897e43e5877e7e83701a19ea4ba 100644 (file)
@@ -211,7 +211,7 @@ Clusters:
   SystemRootToken: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
   ManagementToken: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
   API:
-   MaxConcurrentRequests: %d
+   MaxConcurrentRailsRequests: %d
    MaxQueuedRequests: 0
   SystemLogs: {RequestQueueDumpDirectory: %q}
   Services:
index a3e54952da483c46a87416b200291bc65b66304b..6e6c5298e46f0f5c61b5a978b312aaa49c70c62e 100644 (file)
@@ -99,6 +99,7 @@ type Cluster struct {
                DisabledAPIs                     StringSet
                MaxIndexDatabaseRead             int
                MaxItemsPerResponse              int
+               MaxConcurrentRailsRequests       int
                MaxConcurrentRequests            int
                MaxQueuedRequests                int
                MaxQueueTimeForLockRequests      Duration
index 84df363c2e645c594e25378a7de3fe187dcb2069..064a70a8ed47d70b38c13bd531bcc07db30c8fbf 100644 (file)
@@ -117,7 +117,8 @@ arvados:
 
     ### API
     API:
-      MaxConcurrentRequests: {{ max_workers * 2 }}
+      MaxConcurrentRailsRequests: {{ max_workers * 2 }}
+      MaxConcurrentRequests: {{ max_reqs }}
       MaxQueuedRequests: {{ max_reqs }}
 
     ### CONTAINERS
index 4c0aea25fe7ada8c6b9cf0f0853df8f56fdd2f52..de4c830906ff3418a22fe06175173424b816aec1 100644 (file)
@@ -29,7 +29,7 @@ nginx:
     # Make the passenger queue small (twice the concurrency, so
     # there's at most one pending request for each busy worker)
     # because controller reorders requests based on priority, and
-    # won't send more than API.MaxConcurrentRequests to passenger
+    # won't send more than API.MaxConcurrentRailsRequests to passenger
     # (which is max_workers * 2), so things that are moved to the head
     # of the line get processed quickly.
     passenger_max_request_queue_size: {{ max_workers * 2 + 1 }}