[arvados] created: 2.6.0-362-gd32c7034a

Fri Aug 4 15:22:00 UTC 2023

at  d32c7034ae35872f7dda683b46bdddff17d8b2cd (commit)


commit d32c7034ae35872f7dda683b46bdddff17d8b2cd
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Aug 3 14:26:51 2023 -0400

    20680: Rework worker settings for new controller behavior
    
    We now have separate values for concurrent requests and queued
    requests.  Ensure the arvados, passenger and nginx configurations align
    with the correct values.
    
    Renamed CONTROLLER_NGINX_WORKERS to CONTROLLER_MAX_WORKERS
    
    Renamed CONTROLLER_MAX_CONCURRENT_REQUESTS to
    CONTROLLER_MAX_QUEUED_REQUESTS
    
    Adjusted config.yml defaults to reflect that MaxConcurrentRequests
    means something different.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 49d62e298..1df8f1310 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -225,7 +225,7 @@ Clusters:
 
       # Maximum number of concurrent requests to process concurrently
       # in a single service process, or 0 for no limit.
-      MaxConcurrentRequests: 64
+      MaxConcurrentRequests: 4
 
       # Maximum number of incoming requests to hold in a priority
       # queue waiting for one of the MaxConcurrentRequests slots to be
@@ -234,7 +234,7 @@ Clusters:
       #
       # If MaxQueuedRequests is 0, respond 503 immediately to
       # additional requests while at the MaxConcurrentRequests limit.
-      MaxQueuedRequests: 64
+      MaxQueuedRequests: 128
 
       # Maximum time a "lock container" request is allowed to wait in
       # the incoming request queue before returning 503.
@@ -1074,7 +1074,7 @@ Clusters:
 
       # Number of times a container can be unlocked before being
       # automatically cancelled.
-      MaxDispatchAttempts: 5
+      MaxDispatchAttempts: 10
 
       # Default value for container_count_max for container requests.  This is the
       # number of times Arvados will create a new container to satisfy a container
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
index fadf40986..58a7851c2 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
@@ -3,7 +3,8 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-{%- set max_reqs = "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" %}
+{%- set max_workers = ("__CONTROLLER_MAX_WORKERS__" or grains['num_cpus'])|int %}
+{%- set max_reqs = ("__CONTROLLER_MAX_QUEUED_REQUESTS__" or 128)|int %}
 
 # The variables commented out are the default values that the formula uses.
 # The uncommented values are REQUIRED values. If you don't set them, running
@@ -110,10 +111,9 @@ arvados:
             Password: __INITIAL_USER_PASSWORD__
 
     ### API
-    {%- if max_reqs != "" %}
     API:
-      MaxConcurrentRequests: {{ max_reqs|int }}
-    {%- endif %}
+      MaxConcurrentRequests: {{ max_workers * 2 }}
+      MaxQueuedRequests: {{ max_reqs }}
 
     ### CONTAINERS
     {%- set dispatcher_ssh_privkey = "__DISPATCHER_SSH_PRIVKEY__" %}
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls
index b00317233..47c79afc4 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls
@@ -12,7 +12,8 @@
 {%- set passenger_ruby = '/usr/local/rvm/wrappers/default/ruby'
                            if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04', 'Debian-10') else
                          '/usr/bin/ruby' %}
-{%- set max_reqs = "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" %}
+{%- set max_workers = ("__CONTROLLER_MAX_WORKERS__" or grains['num_cpus'])|int %}
+{%- set max_reqs = ("__CONTROLLER_MAX_QUEUED_REQUESTS__" or 1024)|int %}
 
 ### NGINX
 nginx:
@@ -22,12 +23,15 @@ nginx:
   ### PASSENGER
   passenger:
     passenger_ruby: {{ passenger_ruby }}
-    passenger_max_pool_size: {{ "__CONTROLLER_NGINX_WORKERS__" or grains['num_cpus'] }}
-    {%- if max_reqs != "" %}
-    # Default is 100 -- Configuring this a bit higher than API.MaxConcurrentRequests
-    # to be able to handle /metrics requests even on heavy load situations.
-    passenger_max_request_queue_size: {{ (max_reqs|int * 1.1)|round|int }}
-    {%- endif %}
+    passenger_max_pool_size: {{ max_workers }}
+
+    # Make the passenger queue small (twice the concurrency, so
+    # there's at most one pending request for each busy worker)
+    # because controller reorders requests based on priority, and
+    # won't send more than API.MaxConcurrentRequests to passenger
+    # (which is max_workers * 2), so things that are moved to the head
+    # of the line get processed quickly.
+    passenger_max_request_queue_size: {{ max_workers * 2 + 1 }}
 
   ### SERVER
   server:
@@ -43,16 +47,15 @@ nginx:
       # include: 'modules-enabled/*.conf'
       load_module: {{ passenger_mod }}
       {% endif %}
-      worker_processes: {{ "__CONTROLLER_NGINX_WORKERS__" or grains['num_cpus'] }}
-      {%- if max_reqs != "" %}
-      worker_rlimit_nofile: {{ (max_reqs|int * 3)|round|int }}
-      events:
-        worker_connections: {{ (max_reqs|int * 3)|round|int }}
-      {%- else %}
-      worker_rlimit_nofile: 4096
+      worker_processes: {{ max_workers }}
+
+      # each request is up to 3 connections (1 with client, 1 proxy to
+      # controller, then potentially 1 from controller back to
+      # passenger).  Each connection consumes a file descriptor.
+      # That's how we get these calculations
+      worker_rlimit_nofile: {{ max_reqs * 3 + 1 }}
       events:
-        worker_connections: 1024
-      {%- endif %}
+        worker_connections: {{ max_reqs * 3 + 1 }}
 
   ### SNIPPETS
   snippets:
diff --git a/tools/salt-install/local.params.example.multiple_hosts b/tools/salt-install/local.params.example.multiple_hosts
index 2c3d3c616..12da2e3ed 100644
--- a/tools/salt-install/local.params.example.multiple_hosts
+++ b/tools/salt-install/local.params.example.multiple_hosts
@@ -140,8 +140,8 @@ SHELL_INT_IP=10.1.2.17
 DISABLED_CONTROLLER=""
 
 # Performance tuning parameters
-#CONTROLLER_NGINX_WORKERS=
-CONTROLLER_MAX_CONCURRENT_REQUESTS=64
+#CONTROLLER_MAX_WORKERS=
+#CONTROLLER_MAX_QUEUED_REQUESTS=
 
 # The directory to check for the config files (pillars, states) you want to use.
 # There are a few examples under 'config_examples'.
diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh
index eefd0572a..3b7d72275 100755
--- a/tools/salt-install/provision.sh
+++ b/tools/salt-install/provision.sh
@@ -201,8 +201,8 @@ apply_var_substitutions() {
        s#__SSL_KEY_ENCRYPTED__#${SSL_KEY_ENCRYPTED}#g;
        s#__SSL_KEY_AWS_REGION__#${SSL_KEY_AWS_REGION}#g;
        s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g;
-       s#__CONTROLLER_NGINX_WORKERS__#${CONTROLLER_NGINX_WORKERS:-}#g;
-       s#__CONTROLLER_MAX_CONCURRENT_REQUESTS__#${CONTROLLER_MAX_CONCURRENT_REQUESTS:-64}#g;
+       s#__CONTROLLER_MAX_WORKERS__#${CONTROLLER_MAX_WORKERS:-}#g;
+       s#__CONTROLLER_MAX_QUEUED_REQUESTS__#${CONTROLLER_MAX_QUEUED_REQUESTS:-128}#g;
        s#__MONITORING_USERNAME__#${MONITORING_USERNAME}#g;
        s#__MONITORING_EMAIL__#${MONITORING_EMAIL}#g;
        s#__MONITORING_PASSWORD__#${MONITORING_PASSWORD}#g;

-----------------------------------------------------------------------


hooks/post-receive
--