[arvados] created: 2.5.0-182-ga8ceae657

git repository hosting git at public.arvados.org
Fri Mar 3 19:36:37 UTC 2023


        at  a8ceae65766f78e6f4f8f9f0a10cbd7e398f96b0 (commit)


commit a8ceae65766f78e6f4f8f9f0a10cbd7e398f96b0
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date:   Fri Mar 3 16:24:02 2023 -0300

    16379: Adds basic auth to the prometheus UI.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>

diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_prometheus_configuration.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_prometheus_configuration.sls
index dbb069cdb..d654d6ed0 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_prometheus_configuration.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_prometheus_configuration.sls
@@ -58,5 +58,7 @@ nginx:
             {%- if ssl_key_encrypted_pillar.ssl_key_encrypted.enabled %}
             - ssl_password_file: {{ '/run/arvados/' | path_join(ssl_key_encrypted_pillar.ssl_key_encrypted.privkey_password_filename) }}
             {%- endif %}
+            - auth_basic: '"Restricted Area"'
+            - auth_basic_user_file: htpasswd
             - access_log: /var/log/nginx/prometheus.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/prometheus.__CLUSTER__.__DOMAIN__.error.log
diff --git a/tools/salt-install/config_examples/multi_host/aws/states/nginx_prometheus_configuration.sls b/tools/salt-install/config_examples/multi_host/aws/states/nginx_prometheus_configuration.sls
new file mode 100644
index 000000000..f7eaab1b7
--- /dev/null
+++ b/tools/salt-install/config_examples/multi_host/aws/states/nginx_prometheus_configuration.sls
@@ -0,0 +1,21 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+{%- if salt['pillar.get']('nginx:servers:managed:prometheus-ssl') %}
+
+extra_nginx_prometheus_conf_user___PROMETHEUS_UI_USERNAME__:
+  webutil.user_exists:
+    - name: __PROMETHEUS_UI_USERNAME__
+    - password: {{ "__PROMETHEUS_UI_PASSWORD__" | yaml_dquote }}
+    - htpasswd_file: /etc/nginx/htpasswd
+    - options: d
+    - force: true
+    - require:
+      - pkg: extra_nginx_prometheus_conf_pkgs
+
+extra_nginx_prometheus_conf_pkgs:
+  pkg.installed:
+    - name: apache2-utils
+
+{%- endif %}
\ No newline at end of file
diff --git a/tools/salt-install/local.params.example.multiple_hosts b/tools/salt-install/local.params.example.multiple_hosts
index b6026affb..d7d009958 100644
--- a/tools/salt-install/local.params.example.multiple_hosts
+++ b/tools/salt-install/local.params.example.multiple_hosts
@@ -120,6 +120,10 @@ SSL_KEY_ENCRYPTED="no"
 SSL_KEY_AWS_SECRET_NAME="${CLUSTER}-arvados-ssl-privkey-password"
 SSL_KEY_AWS_REGION="us-east-1"
 
+# Customize Prometheus web UI access credentials
+PROMETHEUS_UI_USERNAME=${INITIAL_USER}
+PROMETHEUS_UI_PASSWORD=${INITIAL_USER_PASSWORD}
+
 # The directory to check for the config files (pillars, states) you want to use.
 # There are a few examples under 'config_examples'.
 # CONFIG_DIR="local_config_dir"
diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh
index 61f654e61..738ac7668 100755
--- a/tools/salt-install/provision.sh
+++ b/tools/salt-install/provision.sh
@@ -445,7 +445,9 @@ for f in $(ls "${SOURCE_PILLARS_DIR}"/*); do
        s#__WORKBENCH_SECRET_KEY__#${WORKBENCH_SECRET_KEY}#g;
        s#__SSL_KEY_ENCRYPTED__#${SSL_KEY_ENCRYPTED}#g;
        s#__SSL_KEY_AWS_REGION__#${SSL_KEY_AWS_REGION}#g;
-       s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g" \
+       s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g;
+       s#__PROMETHEUS_UI_USERNAME__#${PROMETHEUS_UI_USERNAME}#g;
+       s#__PROMETHEUS_UI_PASSWORD__#${PROMETHEUS_UI_PASSWORD}#g" \
   "${f}" > "${P_DIR}"/$(basename "${f}")
 done
 
@@ -520,7 +522,9 @@ if [ -d "${SOURCE_STATES_DIR}" ]; then
          s#__WORKBENCH_SECRET_KEY__#${WORKBENCH_SECRET_KEY}#g;
          s#__SSL_KEY_ENCRYPTED__#${SSL_KEY_ENCRYPTED}#g;
          s#__SSL_KEY_AWS_REGION__#${SSL_KEY_AWS_REGION}#g;
-         s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g" \
+         s#__SSL_KEY_AWS_SECRET_NAME__#${SSL_KEY_AWS_SECRET_NAME}#g;
+         s#__PROMETHEUS_UI_USERNAME__#${PROMETHEUS_UI_USERNAME}#g;
+         s#__PROMETHEUS_UI_PASSWORD__#${PROMETHEUS_UI_PASSWORD}#g" \
     "${f}" > "${F_DIR}/extra/extra"/$(basename "${f}")
   done
 fi
@@ -709,16 +713,16 @@ else
       "monitoring")
         ### States ###
         grep -q "nginx" ${S_DIR}/top.sls || echo "    - nginx" >> ${S_DIR}/top.sls
+        grep -q "extra.nginx_prometheus_configuration" ${S_DIR}/top.sls || echo "    - extra.nginx_prometheus_configuration" >> ${S_DIR}/top.sls
         if [ "${SSL_MODE}" = "lets-encrypt" ]; then
           grep -q "letsencrypt"     ${S_DIR}/top.sls || echo "    - letsencrypt" >> ${S_DIR}/top.sls
           if [ "x${USE_LETSENCRYPT_ROUTE53}" = "xyes" ]; then
             grep -q "aws_credentials" ${S_DIR}/top.sls || echo "    - aws_credentials" >> ${S_DIR}/top.sls
           fi
         elif [ "${SSL_MODE}" = "bring-your-own" ]; then
-          copy_custom_cert ${CUSTOM_CERTS_DIR} ${R}
-          if [ "${SSL_KEY_ENCRYPTED}" = "yes" ]; then
-            grep -q "ssl_key_encrypted" ${S_DIR}/top.sls || echo "    - extra.ssl_key_encrypted" >> ${S_DIR}/top.sls
-          fi
+          for SVC in prometheus; do
+            copy_custom_cert ${CUSTOM_CERTS_DIR} ${SVC}
+          done
         fi
         ### Pillars ###
         grep -q "prometheus_server" ${P_DIR}/top.sls || echo "    - prometheus_server" >> ${P_DIR}/top.sls

commit 65a82818e281ba413f5cd37d034921b8bdc02792
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date:   Tue Feb 28 15:54:55 2023 -0300

    16379: Moves prometheus to its own subdomain instead of 'mon.prefix.domain.tld'
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>

diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/letsencrypt_monitoring_configuration.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/letsencrypt_prometheus_configuration.sls
similarity index 64%
rename from tools/salt-install/config_examples/multi_host/aws/pillars/letsencrypt_monitoring_configuration.sls
rename to tools/salt-install/config_examples/multi_host/aws/pillars/letsencrypt_prometheus_configuration.sls
index 91dbd84e5..7b1165d6d 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/letsencrypt_monitoring_configuration.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/letsencrypt_prometheus_configuration.sls
@@ -6,5 +6,5 @@
 ### LETSENCRYPT
 letsencrypt:
   domainsets:
-    monitoring.__CLUSTER__.__DOMAIN__:
-      - mon.__CLUSTER__.__DOMAIN__
+    prometheus.__CLUSTER__.__DOMAIN__:
+      - prometheus.__CLUSTER__.__DOMAIN__
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_monitoring_configuration.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_prometheus_configuration.sls
similarity index 85%
rename from tools/salt-install/config_examples/multi_host/aws/pillars/nginx_monitoring_configuration.sls
rename to tools/salt-install/config_examples/multi_host/aws/pillars/nginx_prometheus_configuration.sls
index 2f1532457..dbb069cdb 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_monitoring_configuration.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_prometheus_configuration.sls
@@ -24,7 +24,7 @@ nginx:
         overwrite: true
         config:
           - server:
-            - server_name: mon.__CLUSTER__.__DOMAIN__
+            - server_name: prometheus.__CLUSTER__.__DOMAIN__
             - listen:
               - 80
             - location /.well-known:
@@ -39,7 +39,7 @@ nginx:
           __CERT_REQUIRES__
         config:
           - server:
-            - server_name: mon.__CLUSTER__.__DOMAIN__
+            - server_name: prometheus.__CLUSTER__.__DOMAIN__
             - listen:
               - 443 http2 ssl
             - index: index.html index.htm
@@ -58,5 +58,5 @@ nginx:
             {%- if ssl_key_encrypted_pillar.ssl_key_encrypted.enabled %}
             - ssl_password_file: {{ '/run/arvados/' | path_join(ssl_key_encrypted_pillar.ssl_key_encrypted.privkey_password_filename) }}
             {%- endif %}
-            - access_log: /var/log/nginx/mon.__CLUSTER__.__DOMAIN__.access.log combined
-            - error_log: /var/log/nginx/mon.__CLUSTER__.__DOMAIN__.error.log
+            - access_log: /var/log/nginx/prometheus.__CLUSTER__.__DOMAIN__.access.log combined
+            - error_log: /var/log/nginx/prometheus.__CLUSTER__.__DOMAIN__.error.log
diff --git a/tools/salt-install/local.params.example.multiple_hosts b/tools/salt-install/local.params.example.multiple_hosts
index 939aa2abb..b6026affb 100644
--- a/tools/salt-install/local.params.example.multiple_hosts
+++ b/tools/salt-install/local.params.example.multiple_hosts
@@ -147,4 +147,4 @@ RELEASE="production"
 # DOCKER_TAG="v2.4.2"
 # LOCALE_TAG="v0.3.4"
 # LETSENCRYPT_TAG="v2.1.0"
-# PROMETHEUS_TAG="v5.6.5"
\ No newline at end of file
+# PROMETHEUS_TAG="v5.6.5"
diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh
index 5b52a50bc..61f654e61 100755
--- a/tools/salt-install/provision.sh
+++ b/tools/salt-install/provision.sh
@@ -721,25 +721,31 @@ else
           fi
         fi
         ### Pillars ###
-        grep -q "nginx_${R}_configuration" ${P_DIR}/top.sls || echo "    - nginx_${R}_configuration" >> ${P_DIR}/top.sls
         grep -q "prometheus_server" ${P_DIR}/top.sls || echo "    - prometheus_server" >> ${P_DIR}/top.sls
+        for SVC in prometheus; do
+          grep -q "nginx_${SVC}_configuration" ${P_DIR}/top.sls || echo "    - nginx_${SVC}_configuration" >> ${P_DIR}/top.sls
+        done
         if [ "${SSL_MODE}" = "lets-encrypt" ]; then
           grep -q "letsencrypt"     ${P_DIR}/top.sls || echo "    - letsencrypt" >> ${P_DIR}/top.sls
-          grep -q "letsencrypt_${R}_configuration" ${P_DIR}/top.sls || echo "    - letsencrypt_${R}_configuration" >> ${P_DIR}/top.sls
+          for SVC in prometheus; do
+            grep -q "letsencrypt_${SVC}_configuration" ${P_DIR}/top.sls || echo "    - letsencrypt_${SVC}_configuration" >> ${P_DIR}/top.sls
+            sed -i "s/__CERT_REQUIRES__/cmd: create-initial-cert-${SVC}.${CLUSTER}.${DOMAIN}*/g;
+                    s#__CERT_PEM__#/etc/letsencrypt/live/${SVC}.${CLUSTER}.${DOMAIN}/fullchain.pem#g;
+                    s#__CERT_KEY__#/etc/letsencrypt/live/${SVC}.${CLUSTER}.${DOMAIN}/privkey.pem#g" \
+            ${P_DIR}/nginx_${SVC}_configuration.sls
+          done
           if [ "${USE_LETSENCRYPT_ROUTE53}" = "yes" ]; then
             grep -q "aws_credentials" ${P_DIR}/top.sls || echo "    - aws_credentials" >> ${P_DIR}/top.sls
           fi
-          sed -i "s/__CERT_REQUIRES__/cmd: create-initial-cert-${R}.${CLUSTER}.${DOMAIN}*/g;
-                  s#__CERT_PEM__#/etc/letsencrypt/live/${R}.${CLUSTER}.${DOMAIN}/fullchain.pem#g;
-                  s#__CERT_KEY__#/etc/letsencrypt/live/${R}.${CLUSTER}.${DOMAIN}/privkey.pem#g" \
-          ${P_DIR}/nginx_${R}_configuration.sls
         elif [ "${SSL_MODE}" = "bring-your-own" ]; then
           grep -q "ssl_key_encrypted" ${P_DIR}/top.sls || echo "    - ssl_key_encrypted" >> ${P_DIR}/top.sls
-          sed -i "s/__CERT_REQUIRES__/file: extra_custom_certs_file_copy_arvados-${R}.pem/g;
-                  s#__CERT_PEM__#/etc/nginx/ssl/arvados-${R}.pem#g;
-                  s#__CERT_KEY__#/etc/nginx/ssl/arvados-${R}.key#g" \
-            ${P_DIR}/nginx_${R}_configuration.sls
-          grep -q ${R} ${P_DIR}/extra_custom_certs.sls || echo "  - ${R}" >> ${P_DIR}/extra_custom_certs.sls
+          for SVC in prometheus; do
+            sed -i "s/__CERT_REQUIRES__/file: extra_custom_certs_file_copy_arvados-${SVC}.pem/g;
+                    s#__CERT_PEM__#/etc/nginx/ssl/arvados-${SVC}.pem#g;
+                    s#__CERT_KEY__#/etc/nginx/ssl/arvados-${SVC}.key#g" \
+              ${P_DIR}/nginx_${SVC}_configuration.sls
+            grep -q ${SVC} ${P_DIR}/extra_custom_certs.sls || echo "  - ${SVC}" >> ${P_DIR}/extra_custom_certs.sls
+          done
         fi
       ;;
       "api")
diff --git a/tools/salt-install/terraform/aws/vpc/locals.tf b/tools/salt-install/terraform/aws/vpc/locals.tf
index 663f0a5e4..fa8b96771 100644
--- a/tools/salt-install/terraform/aws/vpc/locals.tf
+++ b/tools/salt-install/terraform/aws/vpc/locals.tf
@@ -22,7 +22,7 @@ locals {
   }
   aliases = {
     controller: ["ws"]
-    workbench: ["mon", "workbench2", "webshell"]
+    workbench: ["prometheus", "workbench2", "webshell"]
     keepproxy: ["keep", "download", "*.collections"]
   }
   cname_by_host = flatten([

commit 3146ea1ff77007e2264786655dfdd0c0f4de25ac
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date:   Mon Feb 27 13:29:06 2023 -0300

    16379: Adds prometheus' node_exporter to all nodes.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>

diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/postgresql.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/postgresql.sls
index 2eed52a1d..10cbb6c34 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/postgresql.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/postgresql.sls
@@ -24,19 +24,12 @@ postgres:
       password: "__DATABASE_PASSWORD__"
     prometheus:
       ensure: present
-
-  # tablespaces:
-  #   arvados_tablespace:
-  #     directory: /path/to/some/tbspace/arvados_tbsp
-  #     owner: arvados
-
   databases:
     __CLUSTER___arvados:
       owner: __CLUSTER___arvados
       template: template0
       lc_ctype: en_US.utf8
       lc_collate: en_US.utf8
-      # tablespace: arvados_tablespace
       schemas:
         public:
           owner: __CLUSTER___arvados
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_node_exporter.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_node_exporter.sls
new file mode 100644
index 000000000..74a56645b
--- /dev/null
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_node_exporter.sls
@@ -0,0 +1,17 @@
+---
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+### PROMETHEUS
+prometheus:
+  wanted:
+    component:
+      - node_exporter
+  pkg:
+    use_upstream_repo: true
+    component:
+      node_exporter:
+        service:
+          args:
+            collector.textfile.directory: /var/lib/prometheus/node-exporter
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_pg_exporter.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_pg_exporter.sls
index 73f706d16..62f654e52 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_pg_exporter.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_pg_exporter.sls
@@ -1,9 +1,14 @@
+---
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
+prometheus_pg_exporter:
+  enabled: true
+
 ### PROMETHEUS
 prometheus:
   wanted:
     component:
       - postgres_exporter
+      - node_exporter
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_server.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_server.sls
index dd5594dd8..3988f3c12 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_server.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_server.sls
@@ -9,11 +9,9 @@ prometheus:
     component:
       - prometheus
       - alertmanager
-      - blackbox_exporter
+      - node_exporter
   pkg:
     use_upstream_repo: true
-    use_upstream_archive: true
-
     component:
       prometheus:
         config:
@@ -78,3 +76,20 @@ prometheus:
                   labels:
                     instance: database.__CLUSTER__
                     cluster: __CLUSTER__
+
+            # Nodes
+            - job_name: node
+              static_configs:
+                {% for node in [
+                  'controller',
+                  'keep0',
+                  'keep1',
+                  'keep',
+                  'workbench',
+                  'shell',
+                ] %}
+                - targets: [ "{{ node }}.__CLUSTER__.__DOMAIN__:9100" ]
+                  labels:
+                    instance: "{{ node }}.__CLUSTER__"
+                    cluster: __CLUSTER__
+                {% endfor %}
diff --git a/tools/salt-install/config_examples/multi_host/aws/states/postgresql_mtail.sls b/tools/salt-install/config_examples/multi_host/aws/states/prometheus_pg_exporter.sls
similarity index 91%
rename from tools/salt-install/config_examples/multi_host/aws/states/postgresql_mtail.sls
rename to tools/salt-install/config_examples/multi_host/aws/states/prometheus_pg_exporter.sls
index 6af01bbc2..dee2099fb 100644
--- a/tools/salt-install/config_examples/multi_host/aws/states/postgresql_mtail.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/states/prometheus_pg_exporter.sls
@@ -2,6 +2,9 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+{%- set prometheus_pg_exporter = pillar.get('prometheus_pg_exporter', {'enabled': False}) %}
+
+{%- if prometheus_pg_exporter.enabled %}
 ### PACKAGES
 monitoring_required_pkgs:
   pkg.installed:
@@ -74,5 +77,6 @@ mtail_service:
     - require:
       - pkg: monitoring_required_pkgs
     - watch:
-      - file: /etc/mtail/postgresql.mtail
-      - file: /etc/default/mtail
+      - file: mtail_postgresql_conf
+      - file: mtail_etc_default
+{%- endif %}
\ No newline at end of file
diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh
index 042825280..5b52a50bc 100755
--- a/tools/salt-install/provision.sh
+++ b/tools/salt-install/provision.sh
@@ -481,6 +481,7 @@ fi
 # Replace helper state files that differ from the formula's examples
 if [ -d "${SOURCE_STATES_DIR}" ]; then
   mkdir -p "${F_DIR}"/extra/extra
+  rm -f "${F_DIR}"/extra/extra/*
 
   for f in $(ls "${SOURCE_STATES_DIR}"/*); do
     sed "s#__ANONYMOUS_USER_TOKEN__#${ANONYMOUS_USER_TOKEN}#g;
@@ -690,13 +691,17 @@ else
     grep -q "extra_custom_certs" ${P_DIR}/top.sls || echo "    - extra_custom_certs" >> ${P_DIR}/top.sls
   fi
 
+  # Prometheus state on all nodes due to the node exporter below
+  grep -q "\- prometheus$" ${S_DIR}/top.sls || echo "    - prometheus" >> ${S_DIR}/top.sls
+  # Prometheus node exporter pillar
+  grep -q "prometheus_node_exporter" ${P_DIR}/top.sls || echo "    - prometheus_node_exporter" >> ${P_DIR}/top.sls
+
   for R in ${ROLES}; do
     case "${R}" in
       "database")
         # States
         grep -q "\- postgres$" ${S_DIR}/top.sls || echo "    - postgres" >> ${S_DIR}/top.sls
-        grep -q "prometheus" ${S_DIR}/top.sls || echo "    - prometheus" >> ${S_DIR}/top.sls
-        grep -q "extra.postgresql_mtail" ${S_DIR}/top.sls || echo "    - extra.postgresql_mtail" >> ${S_DIR}/top.sls
+        grep -q "extra.prometheus_pg_exporter" ${S_DIR}/top.sls || echo "    - extra.prometheus_pg_exporter" >> ${S_DIR}/top.sls
         # Pillars
         grep -q "postgresql" ${P_DIR}/top.sls || echo "    - postgresql" >> ${P_DIR}/top.sls
         grep -q "prometheus_pg_exporter" ${P_DIR}/top.sls || echo "    - prometheus_pg_exporter" >> ${P_DIR}/top.sls
@@ -704,7 +709,6 @@ else
       "monitoring")
         ### States ###
         grep -q "nginx" ${S_DIR}/top.sls || echo "    - nginx" >> ${S_DIR}/top.sls
-        grep -q "prometheus" ${S_DIR}/top.sls || echo "    - prometheus" >> ${S_DIR}/top.sls
         if [ "${SSL_MODE}" = "lets-encrypt" ]; then
           grep -q "letsencrypt"     ${S_DIR}/top.sls || echo "    - letsencrypt" >> ${S_DIR}/top.sls
           if [ "x${USE_LETSENCRYPT_ROUTE53}" = "xyes" ]; then

commit 5e542acae5f5621e48fa45a75911a767f0bd3728
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date:   Tue Feb 21 18:21:58 2023 -0300

    16379: Adds prometheus service for monitoring arvados & postgresql.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>

diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
index 25f68ca04..28ae42f6c 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
@@ -161,7 +161,7 @@ arvados:
           'http://__CONTROLLER_INT_IP__:9006': {}
       Keepbalance:
         InternalURLs:
-          'http://localhost:9005': {}
+          'http://__CONTROLLER_INT_IP__:9005': {}
       Keepproxy:
         ExternalURL: 'https://keep.__CLUSTER__.__DOMAIN__:__KEEP_EXT_SSL_PORT__'
         InternalURLs:
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/letsencrypt_monitoring_configuration.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/letsencrypt_monitoring_configuration.sls
new file mode 100644
index 000000000..91dbd84e5
--- /dev/null
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/letsencrypt_monitoring_configuration.sls
@@ -0,0 +1,10 @@
+---
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+### LETSENCRYPT
+letsencrypt:
+  domainsets:
+    monitoring.__CLUSTER__.__DOMAIN__:
+      - mon.__CLUSTER__.__DOMAIN__
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_monitoring_configuration.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_monitoring_configuration.sls
new file mode 100644
index 000000000..2f1532457
--- /dev/null
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/nginx_monitoring_configuration.sls
@@ -0,0 +1,62 @@
+---
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+{%- import_yaml "ssl_key_encrypted.sls" as ssl_key_encrypted_pillar %}
+
+### NGINX
+nginx:
+  ### SERVER
+  server:
+    config:
+      ### STREAMS
+      http:
+        upstream prometheus_upstream:
+          - server: '127.0.0.1:9090 fail_timeout=10s'
+
+  ### SITES
+  servers:
+    managed:
+      ### PROMETHEUS
+      prometheus:
+        enabled: true
+        overwrite: true
+        config:
+          - server:
+            - server_name: mon.__CLUSTER__.__DOMAIN__
+            - listen:
+              - 80
+            - location /.well-known:
+              - root: /var/www
+            - location /:
+              - return: '301 https://$host$request_uri'
+
+      prometheus-ssl:
+        enabled: true
+        overwrite: true
+        requires:
+          __CERT_REQUIRES__
+        config:
+          - server:
+            - server_name: mon.__CLUSTER__.__DOMAIN__
+            - listen:
+              - 443 http2 ssl
+            - index: index.html index.htm
+            - location /:
+              - proxy_pass: 'http://prometheus_upstream'
+              - proxy_read_timeout: 300
+              - proxy_connect_timeout: 90
+              - proxy_redirect: 'off'
+              - proxy_set_header: X-Forwarded-Proto https
+              - proxy_set_header: 'Host $http_host'
+              - proxy_set_header: 'X-Real-IP $remote_addr'
+              - proxy_set_header: 'X-Forwarded-For $proxy_add_x_forwarded_for'
+            - ssl_certificate: __CERT_PEM__
+            - ssl_certificate_key: __CERT_KEY__
+            - include: snippets/ssl_hardening_default.conf
+            {%- if ssl_key_encrypted_pillar.ssl_key_encrypted.enabled %}
+            - ssl_password_file: {{ '/run/arvados/' | path_join(ssl_key_encrypted_pillar.ssl_key_encrypted.privkey_password_filename) }}
+            {%- endif %}
+            - access_log: /var/log/nginx/mon.__CLUSTER__.__DOMAIN__.access.log combined
+            - error_log: /var/log/nginx/mon.__CLUSTER__.__DOMAIN__.error.log
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/postgresql.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/postgresql.sls
index d6320da24..2eed52a1d 100644
--- a/tools/salt-install/config_examples/multi_host/aws/pillars/postgresql.sls
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/postgresql.sls
@@ -5,6 +5,8 @@
 
 ### POSTGRESQL
 postgres:
+  pkgs_extra:
+    - postgresql-contrib
   use_upstream_repo: true
   version: '12'
   postgresconf: |-
@@ -20,6 +22,8 @@ postgres:
     __CLUSTER___arvados:
       ensure: present
       password: "__DATABASE_PASSWORD__"
+    prometheus:
+      ensure: present
 
   # tablespaces:
   #   arvados_tablespace:
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_pg_exporter.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_pg_exporter.sls
new file mode 100644
index 000000000..73f706d16
--- /dev/null
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_pg_exporter.sls
@@ -0,0 +1,9 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+### PROMETHEUS
+prometheus:
+  wanted:
+    component:
+      - postgres_exporter
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_server.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_server.sls
new file mode 100644
index 000000000..dd5594dd8
--- /dev/null
+++ b/tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_server.sls
@@ -0,0 +1,80 @@
+---
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+### PROMETHEUS
+prometheus:
+  wanted:
+    component:
+      - prometheus
+      - alertmanager
+      - blackbox_exporter
+  pkg:
+    use_upstream_repo: true
+    use_upstream_archive: true
+
+    component:
+      prometheus:
+        config:
+          global:
+            scrape_interval: 15s
+            evaluation_interval: 15s
+          rule_files:
+            - rules.yml
+
+          scrape_configs:
+            - job_name: prometheus
+              # metrics_path defaults to /metrics
+              # scheme defaults to http.
+              static_configs:
+              - targets: ['localhost:9090']
+                labels:
+                  instance: mon.__CLUSTER__
+                  cluster: __CLUSTER__
+
+            ## Arvados unique jobs
+            - job_name: keep_web
+              bearer_token: __MANAGEMENT_TOKEN__
+              scheme: https
+              static_configs:
+                - targets: ['keep.__CLUSTER__.__DOMAIN__:443']
+                  labels:
+                    instance: keep-web.__CLUSTER__
+                    cluster: __CLUSTER__
+            - job_name: keep_balance
+              bearer_token: __MANAGEMENT_TOKEN__
+              static_configs:
+                - targets: ['__CONTROLLER_INT_IP__:9005']
+                  labels:
+                    instance: keep-balance.__CLUSTER__
+                    cluster: __CLUSTER__
+            - job_name: keepstore
+              bearer_token: __MANAGEMENT_TOKEN__
+              static_configs:
+                - targets: ['__KEEPSTORE0_INT_IP__:25107']
+                  labels:
+                    instance: keep0.__CLUSTER__
+                    cluster: __CLUSTER__
+                - targets: ['__KEEPSTORE1_INT_IP__:25107']
+                  labels:
+                    instance: keep1.__CLUSTER__
+                    cluster: __CLUSTER__
+            - job_name: arvados_dispatch_cloud
+              bearer_token: __MANAGEMENT_TOKEN__
+              static_configs:
+                - targets: ['__CONTROLLER_INT_IP__:9006']
+                  labels:
+                    instance: arvados-dispatch-cloud.__CLUSTER__
+                    cluster: __CLUSTER__
+
+            # Database
+            - job_name: postgresql
+              static_configs:
+                - targets: [
+                    '__DATABASE_INT_IP__:9187',
+                    '__DATABASE_INT_IP__:3903'
+                  ]
+                  labels:
+                    instance: database.__CLUSTER__
+                    cluster: __CLUSTER__
diff --git a/tools/salt-install/config_examples/multi_host/aws/states/postgresql_mtail.sls b/tools/salt-install/config_examples/multi_host/aws/states/postgresql_mtail.sls
new file mode 100644
index 000000000..6af01bbc2
--- /dev/null
+++ b/tools/salt-install/config_examples/multi_host/aws/states/postgresql_mtail.sls
@@ -0,0 +1,78 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+### PACKAGES
+monitoring_required_pkgs:
+  pkg.installed:
+    - name: mtail
+
+### FILES
+prometheus_pg_exporter_etc_default:
+  file.managed:
+    - name: /etc/default/prometheus-postgres-exporter
+    - contents: |
+        ### This file managed by Salt, do not edit by hand!!
+        #
+        # For details, check /usr/share/doc/prometheus-postgres-exporter/README.Debian
+        DATA_SOURCE_NAME='user=prometheus host=/run/postgresql dbname=postgres'
+    - require:
+      - pkg: prometheus-package-install-postgres_exporter-installed
+
+mtail_postgresql_conf:
+  file.managed:
+    - name: /etc/mtail/postgresql.mtail
+    - contents: |
+        ########################################################################
+        # File managed by Salt.
+        # Your changes will be overwritten.
+        ########################################################################
+
+        # Parser for postgresql's log statement duration
+
+        gauge postgresql_statement_duration_seconds by statement
+
+        /^/ +
+        /(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} (\w+)) / + # 2019-01-16 16:53:45 GMT
+        /LOG: +duration: / +
+        /(?P<duration>[0-9\.]+) ms/ + # 153.967 ms
+        /(.*?): (?P<statement>.+)/ + # statement: SELECT COUNT(*) FROM (SELECT rolname FROM pg_roles WHERE rolname='arvados') count
+        /$/ {
+          strptime($timestamp, "2006-01-02 15:04:05 MST") # for tests
+
+          postgresql_statement_duration_seconds[$statement] = $duration / 1000
+        }
+    - require:
+      - pkg: monitoring_required_pkgs
+
+mtail_etc_default:
+  file.managed:
+    - name: /etc/default/mtail
+    - contents: |
+        ### This file managed by Salt, do not edit by hand!!
+        #
+        ENABLED=true
+        # List of files to monitor (mandatory).
+        LOGS=/var/log/postgresql/postgresql*log
+    - require:
+      - pkg: monitoring_required_pkgs
+
+### SERVICES
+prometheus_pg_exporter_service:
+  service.running:
+    - name: prometheus-postgres-exporter
+    - enable: true
+    - require:
+      - pkg: prometheus-package-install-postgres_exporter-installed
+    - watch:
+      - file: /etc/default/prometheus-postgres-exporter
+
+mtail_service:
+  service.running:
+    - name: mtail
+    - enable: true
+    - require:
+      - pkg: monitoring_required_pkgs
+    - watch:
+      - file: /etc/mtail/postgresql.mtail
+      - file: /etc/default/mtail
diff --git a/tools/salt-install/installer.sh b/tools/salt-install/installer.sh
index 21f36faac..7b493865b 100755
--- a/tools/salt-install/installer.sh
+++ b/tools/salt-install/installer.sh
@@ -275,7 +275,7 @@ case "$subcmd" in
 	else
 	    # Just deploy the node that was supplied on the command line.
 	    sync $NODE $BRANCH
-	    deploynode $NODE ""
+	    deploynode $NODE "${NODES[$NODE]}"
 	fi
 
 	set +x
diff --git a/tools/salt-install/local.params.example.multiple_hosts b/tools/salt-install/local.params.example.multiple_hosts
index 0064a78c5..939aa2abb 100644
--- a/tools/salt-install/local.params.example.multiple_hosts
+++ b/tools/salt-install/local.params.example.multiple_hosts
@@ -24,7 +24,7 @@ NODES=(
   [keep0.${CLUSTER}.${DOMAIN}]=keepstore
   [keep1.${CLUSTER}.${DOMAIN}]=keepstore
   [keep.${CLUSTER}.${DOMAIN}]=keepproxy,keepweb
-  [workbench.${CLUSTER}.${DOMAIN}]=workbench,workbench2,webshell
+  [workbench.${CLUSTER}.${DOMAIN}]=monitoring,workbench,workbench2,webshell
   [shell.${CLUSTER}.${DOMAIN}]=shell
 )
 
@@ -147,3 +147,4 @@ RELEASE="production"
 # DOCKER_TAG="v2.4.2"
 # LOCALE_TAG="v0.3.4"
 # LETSENCRYPT_TAG="v2.1.0"
+# PROMETHEUS_TAG="v5.6.5"
\ No newline at end of file
diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh
index 86335ff8e..042825280 100755
--- a/tools/salt-install/provision.sh
+++ b/tools/salt-install/provision.sh
@@ -32,6 +32,7 @@ usage() {
   echo >&2 "                                                keepbalance"
   echo >&2 "                                                keepstore"
   echo >&2 "                                                keepweb"
+  echo >&2 "                                                monitoring"
   echo >&2 "                                                shell"
   echo >&2 "                                                webshell"
   echo >&2 "                                                websocket"
@@ -108,7 +109,7 @@ arguments() {
         for i in ${2//,/ }
           do
             # Verify the role exists
-            if [[ ! "database,api,controller,keepstore,websocket,keepweb,workbench2,webshell,keepbalance,keepproxy,shell,workbench,dispatcher" == *"$i"* ]]; then
+            if [[ ! "database,api,controller,keepstore,websocket,keepweb,workbench2,webshell,keepbalance,keepproxy,shell,workbench,dispatcher,monitoring" == *"$i"* ]]; then
               echo "The role '${i}' is not a valid role"
               usage
               exit 1
@@ -220,6 +221,7 @@ DOCKER_TAG="v2.4.2"
 LOCALE_TAG="v0.3.4"
 LETSENCRYPT_TAG="v2.1.0"
 LOGROTATE_TAG="v0.14.0"
+PROMETHEUS_TAG="v5.6.5"
 
 # Salt's dir
 DUMP_SALT_CONFIG_DIR=""
@@ -358,6 +360,11 @@ test -d postgres && ( cd postgres && git fetch ) \
   || git clone --quiet ${POSTGRES_URL} ${F_DIR}/postgres
 ( cd postgres && git checkout --quiet tags/"${POSTGRES_TAG}" )
 
+echo "...prometheus"
+test -d prometheus && ( cd prometheus && git fetch ) \
+  || git clone --quiet https://github.com/saltstack-formulas/prometheus-formula.git ${F_DIR}/prometheus
+( cd prometheus && git checkout --quiet tags/"${PROMETHEUS_TAG}" )
+
 echo "...letsencrypt"
 test -d letsencrypt && ( cd letsencrypt && git fetch ) \
   || git clone --quiet https://github.com/saltstack-formulas/letsencrypt-formula.git ${F_DIR}/letsencrypt
@@ -687,9 +694,49 @@ else
     case "${R}" in
       "database")
         # States
-        echo "    - postgres" >> ${S_DIR}/top.sls
+        grep -q "\- postgres$" ${S_DIR}/top.sls || echo "    - postgres" >> ${S_DIR}/top.sls
+        grep -q "prometheus" ${S_DIR}/top.sls || echo "    - prometheus" >> ${S_DIR}/top.sls
+        grep -q "extra.postgresql_mtail" ${S_DIR}/top.sls || echo "    - extra.postgresql_mtail" >> ${S_DIR}/top.sls
         # Pillars
-        echo '    - postgresql' >> ${P_DIR}/top.sls
+        grep -q "postgresql" ${P_DIR}/top.sls || echo "    - postgresql" >> ${P_DIR}/top.sls
+        grep -q "prometheus_pg_exporter" ${P_DIR}/top.sls || echo "    - prometheus_pg_exporter" >> ${P_DIR}/top.sls
+      ;;
+      "monitoring")
+        ### States ###
+        grep -q "nginx" ${S_DIR}/top.sls || echo "    - nginx" >> ${S_DIR}/top.sls
+        grep -q "prometheus" ${S_DIR}/top.sls || echo "    - prometheus" >> ${S_DIR}/top.sls
+        if [ "${SSL_MODE}" = "lets-encrypt" ]; then
+          grep -q "letsencrypt"     ${S_DIR}/top.sls || echo "    - letsencrypt" >> ${S_DIR}/top.sls
+          if [ "x${USE_LETSENCRYPT_ROUTE53}" = "xyes" ]; then
+            grep -q "aws_credentials" ${S_DIR}/top.sls || echo "    - aws_credentials" >> ${S_DIR}/top.sls
+          fi
+        elif [ "${SSL_MODE}" = "bring-your-own" ]; then
+          copy_custom_cert ${CUSTOM_CERTS_DIR} ${R}
+          if [ "${SSL_KEY_ENCRYPTED}" = "yes" ]; then
+            grep -q "ssl_key_encrypted" ${S_DIR}/top.sls || echo "    - extra.ssl_key_encrypted" >> ${S_DIR}/top.sls
+          fi
+        fi
+        ### Pillars ###
+        grep -q "nginx_${R}_configuration" ${P_DIR}/top.sls || echo "    - nginx_${R}_configuration" >> ${P_DIR}/top.sls
+        grep -q "prometheus_server" ${P_DIR}/top.sls || echo "    - prometheus_server" >> ${P_DIR}/top.sls
+        if [ "${SSL_MODE}" = "lets-encrypt" ]; then
+          grep -q "letsencrypt"     ${P_DIR}/top.sls || echo "    - letsencrypt" >> ${P_DIR}/top.sls
+          grep -q "letsencrypt_${R}_configuration" ${P_DIR}/top.sls || echo "    - letsencrypt_${R}_configuration" >> ${P_DIR}/top.sls
+          if [ "${USE_LETSENCRYPT_ROUTE53}" = "yes" ]; then
+            grep -q "aws_credentials" ${P_DIR}/top.sls || echo "    - aws_credentials" >> ${P_DIR}/top.sls
+          fi
+          sed -i "s/__CERT_REQUIRES__/cmd: create-initial-cert-${R}.${CLUSTER}.${DOMAIN}*/g;
+                  s#__CERT_PEM__#/etc/letsencrypt/live/${R}.${CLUSTER}.${DOMAIN}/fullchain.pem#g;
+                  s#__CERT_KEY__#/etc/letsencrypt/live/${R}.${CLUSTER}.${DOMAIN}/privkey.pem#g" \
+          ${P_DIR}/nginx_${R}_configuration.sls
+        elif [ "${SSL_MODE}" = "bring-your-own" ]; then
+          grep -q "ssl_key_encrypted" ${P_DIR}/top.sls || echo "    - ssl_key_encrypted" >> ${P_DIR}/top.sls
+          sed -i "s/__CERT_REQUIRES__/file: extra_custom_certs_file_copy_arvados-${R}.pem/g;
+                  s#__CERT_PEM__#/etc/nginx/ssl/arvados-${R}.pem#g;
+                  s#__CERT_KEY__#/etc/nginx/ssl/arvados-${R}.key#g" \
+            ${P_DIR}/nginx_${R}_configuration.sls
+          grep -q ${R} ${P_DIR}/extra_custom_certs.sls || echo "  - ${R}" >> ${P_DIR}/extra_custom_certs.sls
+        fi
       ;;
       "api")
         # States
diff --git a/tools/salt-install/terraform/aws/vpc/locals.tf b/tools/salt-install/terraform/aws/vpc/locals.tf
index 8338aec7c..663f0a5e4 100644
--- a/tools/salt-install/terraform/aws/vpc/locals.tf
+++ b/tools/salt-install/terraform/aws/vpc/locals.tf
@@ -22,7 +22,7 @@ locals {
   }
   aliases = {
     controller: ["ws"]
-    workbench: ["workbench2", "webshell"]
+    workbench: ["mon", "workbench2", "webshell"]
     keepproxy: ["keep", "download", "*.collections"]
   }
   cname_by_host = flatten([
diff --git a/tools/salt-install/terraform/aws/vpc/terraform.tfvars b/tools/salt-install/terraform/aws/vpc/terraform.tfvars
index cac62ed6f..210a2d2e7 100644
--- a/tools/salt-install/terraform/aws/vpc/terraform.tfvars
+++ b/tools/salt-install/terraform/aws/vpc/terraform.tfvars
@@ -3,5 +3,5 @@
 # SPDX-License-Identifier: CC-BY-SA-3.0
 
 region_name = "us-east-1"
-# cluster_name = "xarv1"
-# domain_name = "example.com"
+cluster_name = "xarv1"
+domain_name = "example.com"

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list