[arvados] created: 2.6.0-422-ge6fd711d2

Thu Aug 3 18:37:29 UTC 2023

at  e6fd711d2ddd776bc6a9171a37eab13a1a2e4584 (commit)


commit e6fd711d2ddd776bc6a9171a37eab13a1a2e4584
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Aug 3 14:26:51 2023 -0400

    20680: Rework worker settings for new controller behavior
    
    We now have separate values for concurrent requests and queued
    requests.  Ensure the arvados, passenger and nginx configurations align
    with the correct values.
    
    Renamed CONTROLLER_NGINX_WORKERS to CONTROLLER_MAX_WORKERS
    
    Renamed CONTROLLER_MAX_CONCURRENT_REQUESTS to
    CONTROLLER_MAX_QUEUED_REQUESTS
    
    Adjusted config.yml defaults to reflect that MaxConcurrentRequests
    means something different.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 723e64cea..b78116255 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -225,7 +225,7 @@ Clusters:
 
       # Maximum number of concurrent requests to process concurrently
       # in a single service process, or 0 for no limit.
-      MaxConcurrentRequests: 64
+      MaxConcurrentRequests: 4
 
       # Maximum number of incoming requests to hold in a priority
       # queue waiting for one of the MaxConcurrentRequests slots to be
@@ -234,7 +234,7 @@ Clusters:
       #
       # If MaxQueuedRequests is 0, respond 503 immediately to
       # additional requests while at the MaxConcurrentRequests limit.
-      MaxQueuedRequests: 64
+      MaxQueuedRequests: 128
 
       # Maximum time a "lock container" request is allowed to wait in
       # the incoming request queue before returning 503.
@@ -1074,7 +1074,7 @@ Clusters:
 
       # Number of times a container can be unlocked before being
       # automatically cancelled.
-      MaxDispatchAttempts: 5
+      MaxDispatchAttempts: 10
 
       # Default value for container_count_max for container requests.  This is the
       # number of times Arvados will create a new container to satisfy a container
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
index 98fcf5f6d..f8ee95836 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
@@ -108,11 +108,9 @@ arvados:
             Password: __INITIAL_USER_PASSWORD__
 
     ### API
-    {%- set max_reqs = "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" %}
-    {%- if max_reqs != "" and max_reqs is number %}
     API:
-      MaxConcurrentRequests: max_reqs
-    {%- endif %}
+      MaxConcurrentRequests: {{ ("__CONTROLLER_MAX_WORKERS__" or grains['num_cpus'])|int * 2 }}
+      MaxQueuedRequests: __CONTROLLER_MAX_QUEUED_REQUESTS__
 
     ### CONTAINERS
     {%- set dispatcher_ssh_privkey = "__DISPATCHER_SSH_PRIVKEY__" %}
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls
index b00317233..4ba2f2fc9 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_passenger.sls
@@ -12,7 +12,8 @@
 {%- set passenger_ruby = '/usr/local/rvm/wrappers/default/ruby'
                            if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04', 'Debian-10') else
                          '/usr/bin/ruby' %}
-{%- set max_reqs = "__CONTROLLER_MAX_CONCURRENT_REQUESTS__" %}
+{%- set max_workers = ("__CONTROLLER_MAX_WORKERS__" or grains['num_cpus'])|int %}
+{%- set max_reqs = ("__CONTROLLER_MAX_QUEUED_REQUESTS__" or 1024)|int %}
 
 ### NGINX
 nginx:
@@ -22,12 +23,14 @@ nginx:
   ### PASSENGER
   passenger:
     passenger_ruby: {{ passenger_ruby }}
-    passenger_max_pool_size: {{ "__CONTROLLER_NGINX_WORKERS__" or grains['num_cpus'] }}
-    {%- if max_reqs != "" %}
-    # Default is 100 -- Configuring this a bit higher than API.MaxConcurrentRequests
-    # to be able to handle /metrics requests even on heavy load situations.
-    passenger_max_request_queue_size: {{ (max_reqs|int * 1.1)|round|int }}
-    {%- endif %}
+    passenger_max_pool_size: {{ max_workers }}
+
+    # Make the passenger queue small (twice the concurrency, so
+    # there's at most one pending request for each busy worker) because
+    # controller reorders requests based on priority, and won't send more than
+    # API.MaxConcurrentRequests to passenger, so things that are moved
+    # to the head of the line get processed quickly.
+    passenger_max_request_queue_size: {{ max_workers|int * 2 + 1 }}
 
   ### SERVER
   server:
@@ -43,16 +46,15 @@ nginx:
       # include: 'modules-enabled/*.conf'
       load_module: {{ passenger_mod }}
       {% endif %}
-      worker_processes: {{ "__CONTROLLER_NGINX_WORKERS__" or grains['num_cpus'] }}
-      {%- if max_reqs != "" %}
-      worker_rlimit_nofile: {{ (max_reqs|int * 3)|round|int }}
-      events:
-        worker_connections: {{ (max_reqs|int * 3)|round|int }}
-      {%- else %}
-      worker_rlimit_nofile: 4096
+      worker_processes: {{ max_workers }}
+
+      # each request is up to 3 connections (1 with client, 1 proxy to
+      # controller, then potentially 1 from controller back to
+      # passenger).  Each connection consumes a file descriptor.
+      # That's how we get these calculations
+      worker_rlimit_nofile: {{ max_reqs * 3 + 1 }}
       events:
-        worker_connections: 1024
-      {%- endif %}
+        worker_connections: {{ max_reqs * 3 + 1 }}
 
   ### SNIPPETS
   snippets:
diff --git a/tools/salt-install/local.params.example.multiple_hosts b/tools/salt-install/local.params.example.multiple_hosts
index d1cdfeb3c..56e289958 100644
--- a/tools/salt-install/local.params.example.multiple_hosts
+++ b/tools/salt-install/local.params.example.multiple_hosts
@@ -122,8 +122,8 @@ DATABASE_INT_IP=10.1.1.11
 SHELL_INT_IP=10.1.2.17
 
 # Performance tuning parameters
-#CONTROLLER_NGINX_WORKERS=
-#CONTROLLER_MAX_CONCURRENT_REQUESTS=
+#CONTROLLER_MAX_WORKERS=
+#CONTROLLER_MAX_QUEUED_REQUESTS=
 
 # The directory to check for the config files (pillars, states) you want to use.
 # There are a few examples under 'config_examples'.
diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh
index 7010b388b..5019f9976 100755
--- a/tools/salt-install/provision.sh
+++ b/tools/salt-install/provision.sh
@@ -456,8 +456,8 @@ for f in $(ls "${SOURCE_PILLARS_DIR}"/*); do
        s#__SSL_KEY_ENCRYPTED__#${SSL_KEY_ENCRYPTED}#g;
        s#__SSL_KEY_AWS_REGION__#${SSL_KEY_AWS_REGION}#g;
        s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g;
-       s#__CONTROLLER_NGINX_WORKERS__#${CONTROLLER_NGINX_WORKERS}#g;
-       s#__CONTROLLER_MAX_CONCURRENT_REQUESTS__#${CONTROLLER_MAX_CONCURRENT_REQUESTS}#g;
+       s#__CONTROLLER_MAX_WORKERS__#${CONTROLLER_MAX_WORKERS}#g;
+       s#__CONTROLLER_MAX_QUEUED_REQUESTS__#${CONTROLLER_MAX_QUEUED_REQUESTS}#g;
        s#__MONITORING_USERNAME__#${MONITORING_USERNAME}#g;
        s#__MONITORING_EMAIL__#${MONITORING_EMAIL}#g;
        s#__MONITORING_PASSWORD__#${MONITORING_PASSWORD}#g;
@@ -536,8 +536,8 @@ if [ -d "${SOURCE_STATES_DIR}" ]; then
          s#__SSL_KEY_ENCRYPTED__#${SSL_KEY_ENCRYPTED}#g;
          s#__SSL_KEY_AWS_REGION__#${SSL_KEY_AWS_REGION}#g;
          s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g;
-         s#__CONTROLLER_NGINX_WORKERS__#${CONTROLLER_NGINX_WORKERS}#g;
-         s#__CONTROLLER_MAX_CONCURRENT_REQUESTS__#${CONTROLLER_MAX_CONCURRENT_REQUESTS}#g;
+         s#__CONTROLLER_MAX_WORKERS__#${CONTROLLER_MAX_WORKERS}#g;
+         s#__CONTROLLER_MAX_QUEUED_REQUESTS__#${CONTROLLER_MAX_QUEUED_REQUESTS}#g;
          s#__MONITORING_USERNAME__#${MONITORING_USERNAME}#g;
          s#__MONITORING_EMAIL__#${MONITORING_EMAIL}#g;
          s#__MONITORING_PASSWORD__#${MONITORING_PASSWORD}#g;

-----------------------------------------------------------------------


hooks/post-receive
--