20680: Rework worker settings for new controller behavior

author Peter Amstutz <peter.amstutz@curii.com>

Thu, 3 Aug 2023 18:26:51 +0000 (14:26 -0400)

committer Peter Amstutz <peter.amstutz@curii.com>

Fri, 4 Aug 2023 15:21:35 +0000 (11:21 -0400)
author Peter Amstutz <peter.amstutz@curii.com>
Thu, 3 Aug 2023 18:26:51 +0000 (14:26 -0400)
committer Peter Amstutz <peter.amstutz@curii.com>
Fri, 4 Aug 2023 15:21:35 +0000 (11:21 -0400)
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml

index 49d62e2980dcee86825d0af8f35b5c1377505f9a..1df8f1310dbec93392c9cb8a70f6d488cf44cbe9 100644 (file)
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -225,7 +225,7 @@ Clusters:
  
        # Maximum number of concurrent requests to process concurrently
        # in a single service process, or 0 for no limit.
-      MaxConcurrentRequests: 64
+      MaxConcurrentRequests: 4
  
        # Maximum number of incoming requests to hold in a priority
        # queue waiting for one of the MaxConcurrentRequests slots to be
@@ -234,7 +234,7 @@ Clusters:
        #
        # If MaxQueuedRequests is 0, respond 503 immediately to
        # additional requests while at the MaxConcurrentRequests limit.
-      MaxQueuedRequests: 64
+      MaxQueuedRequests: 128
  
        # Maximum time a "lock container" request is allowed to wait in
        # the incoming request queue before returning 503.
@@ -1074,7 +1074,7 @@ Clusters:
  
        # Number of times a container can be unlocked before being
        # automatically cancelled.
-      MaxDispatchAttempts: 5
+      MaxDispatchAttempts: 10
  
        # Default value for container_count_max for container requests.  This is the
        # number of times Arvados will create a new container to satisfy a container
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls

index fadf4098678e4e3b862d9c62014dc91954007be9..58a7851c28a7ea2c9da1d6112816dcea85240bd0 100644 (file)
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
@@ -3,7 +3,8 @@
  #
  # SPDX-License-Identifier: AGPL-3.0
  
-{%- set max_reqs = "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" %}
+{%- set max_workers = ("__CONTROLLER_MAX_WORKERS__" or grains['num_cpus'])|int %}
+{%- set max_reqs = ("__CONTROLLER_MAX_QUEUED_REQUESTS__" or 128)|int %}
  
  # The variables commented out are the default values that the formula uses.
  # The uncommented values are REQUIRED values. If you don't set them, running
@@ -110,10 +111,9 @@ arvados:
              Password: __INITIAL_USER_PASSWORD__
  
      ### API
-    {%- if max_reqs != "" %}
      API:
-      MaxConcurrentRequests: {{ max_reqs|int }}
-    {%- endif %}
+      MaxConcurrentRequests: {{ max_workers * 2 }}
+      MaxQueuedRequests: {{ max_reqs }}
  
      ### CONTAINERS
      {%- set dispatcher_ssh_privkey = "__DISPATCHER_SSH_PRIVKEY__" %}
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls

index b003172330e465173e1f1f200449d5f1bb6a171a..47c79afc4fb6b3ed794fc7581cdc9421e6499e3c 100644 (file)
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls
@@ -12,7 +12,8 @@
  {%- set passenger_ruby = '/usr/local/rvm/wrappers/default/ruby'
                             if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04', 'Debian-10') else
                           '/usr/bin/ruby' %}
-{%- set max_reqs = "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" %}
+{%- set max_workers = ("__CONTROLLER_MAX_WORKERS__" or grains['num_cpus'])|int %}
+{%- set max_reqs = ("__CONTROLLER_MAX_QUEUED_REQUESTS__" or 1024)|int %}
  
  ### NGINX
  nginx:
@@ -22,12 +23,15 @@ nginx:
    ### PASSENGER
    passenger:
      passenger_ruby: {{ passenger_ruby }}
-    passenger_max_pool_size: {{ "__CONTROLLER_NGINX_WORKERS__" or grains['num_cpus'] }}
-    {%- if max_reqs != "" %}
-    # Default is 100 -- Configuring this a bit higher than API.MaxConcurrentRequests
-    # to be able to handle /metrics requests even on heavy load situations.
-    passenger_max_request_queue_size: {{ (max_reqs|int * 1.1)|round|int }}
-    {%- endif %}
+    passenger_max_pool_size: {{ max_workers }}
+
+    # Make the passenger queue small (twice the concurrency, so
+    # there's at most one pending request for each busy worker)
+    # because controller reorders requests based on priority, and
+    # won't send more than API.MaxConcurrentRequests to passenger
+    # (which is max_workers * 2), so things that are moved to the head
+    # of the line get processed quickly.
+    passenger_max_request_queue_size: {{ max_workers * 2 + 1 }}
  
    ### SERVER
    server:
@@ -43,16 +47,15 @@ nginx:
        # include: 'modules-enabled/*.conf'
        load_module: {{ passenger_mod }}
        {% endif %}
-      worker_processes: {{ "__CONTROLLER_NGINX_WORKERS__" or grains['num_cpus'] }}
-      {%- if max_reqs != "" %}
-      worker_rlimit_nofile: {{ (max_reqs|int * 3)|round|int }}
-      events:
-        worker_connections: {{ (max_reqs|int * 3)|round|int }}
-      {%- else %}
-      worker_rlimit_nofile: 4096
+      worker_processes: {{ max_workers }}
+
+      # each request is up to 3 connections (1 with client, 1 proxy to
+      # controller, then potentially 1 from controller back to
+      # passenger).  Each connection consumes a file descriptor.
+      # That's how we get these calculations
+      worker_rlimit_nofile: {{ max_reqs * 3 + 1 }}
        events:
-        worker_connections: 1024
-      {%- endif %}
+        worker_connections: {{ max_reqs * 3 + 1 }}
  
    ### SNIPPETS
    snippets:
diff --git a/tools/salt-install/local.params.example.multiple_hosts b/tools/salt-install/local.params.example.multiple_hosts

index 2c3d3c616931f20d7bea31ab498002013aabe2d0..12da2e3ed683b16b8d3b671a7cbeb793b8d41fab 100644 (file)
--- a/tools/salt-install/local.params.example.multiple_hosts
+++ b/tools/salt-install/local.params.example.multiple_hosts
@@ -140,8 +140,8 @@ SHELL_INT_IP=10.1.2.17
  DISABLED_CONTROLLER=""
  
  # Performance tuning parameters
-#CONTROLLER_NGINX_WORKERS=
-CONTROLLER_MAX_CONCURRENT_REQUESTS=64
+#CONTROLLER_MAX_WORKERS=
+#CONTROLLER_MAX_QUEUED_REQUESTS=
  
  # The directory to check for the config files (pillars, states) you want to use.
  # There are a few examples under 'config_examples'.
diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh

index eefd0572aa067690631cd8b6c303f025fc44594c..3b7d722755648d6ce6e1c2185c1676133b40d230 100755 (executable)
--- a/tools/salt-install/provision.sh
+++ b/tools/salt-install/provision.sh
@@ -201,8 +201,8 @@ apply_var_substitutions() {
         s#__SSL_KEY_ENCRYPTED__#${SSL_KEY_ENCRYPTED}#g;
         s#__SSL_KEY_AWS_REGION__#${SSL_KEY_AWS_REGION}#g;
         s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g;
-       s#__CONTROLLER_NGINX_WORKERS__#${CONTROLLER_NGINX_WORKERS:-}#g;
-       s#__CONTROLLER_MAX_CONCURRENT_REQUESTS__#${CONTROLLER_MAX_CONCURRENT_REQUESTS:-64}#g;
+       s#__CONTROLLER_MAX_WORKERS__#${CONTROLLER_MAX_WORKERS:-}#g;
+       s#__CONTROLLER_MAX_QUEUED_REQUESTS__#${CONTROLLER_MAX_QUEUED_REQUESTS:-128}#g;
         s#__MONITORING_USERNAME__#${MONITORING_USERNAME}#g;
         s#__MONITORING_EMAIL__#${MONITORING_EMAIL}#g;
         s#__MONITORING_PASSWORD__#${MONITORING_PASSWORD}#g;
author	Peter Amstutz <peter.amstutz@curii.com>
	Thu, 3 Aug 2023 18:26:51 +0000 (14:26 -0400)
committer	Peter Amstutz <peter.amstutz@curii.com>
	Fri, 4 Aug 2023 15:21:35 +0000 (11:21 -0400)
lib/config/config.default.yml		patch \| blob \| history
tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls		patch \| blob \| history
tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls		patch \| blob \| history
tools/salt-install/local.params.example.multiple_hosts		patch \| blob \| history
tools/salt-install/provision.sh		patch \| blob \| history