[ARVADOS] updated: 2.1.0-2394-gb971c02eb

Git user git at public.arvados.org
Tue Apr 26 20:17:27 UTC 2022


Summary of changes:
 doc/_includes/_restart_api.liquid                  |  7 +++++
 .../maintenance-and-upgrading.html.textile.liquid  |  4 +++
 doc/install/install-api-server.html.textile.liquid |  3 +-
 sdk/go/health/aggregator.go                        | 35 ++++++++++++++++------
 services/api/config/initializers/reload_config.rb  | 28 +++++++++++++++--
 5 files changed, 64 insertions(+), 13 deletions(-)

       via  b971c02ebd292e5d689b09fed5747ce01147ae77 (commit)
       via  784ef6318884a2e866390e1650185e4e43c875be (commit)
       via  7edeceb6f2a768b37ee09500f8c87ca6a2fb0be6 (commit)
       via  8a5e460875fd427c28f85acbc27c7b92ff803207 (commit)
      from  1a3a8b0a7cc31e9bfb63c6001bc049a831e13dd1 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit b971c02ebd292e5d689b09fed5747ce01147ae77
Author: Tom Clegg <tom at curii.com>
Date:   Tue Apr 26 15:46:16 2022 -0400

    18794: Don't restart if new config is unreadable or unchanged.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/api/config/initializers/reload_config.rb b/services/api/config/initializers/reload_config.rb
index 6a7eac7a5..0698f92ca 100644
--- a/services/api/config/initializers/reload_config.rb
+++ b/services/api/config/initializers/reload_config.rb
@@ -16,12 +16,34 @@ else
       # which could be a long time.
       Rails.logger.debug("reload_config: waiting for lock on #{lockfile}")
       f.flock(File::LOCK_EX)
+
+      t_lastload = Rails.configuration.SourceTimestamp
+      hash_lastload = Rails.configuration.SourceSHA256
       conffile = ENV['ARVADOS_CONFIG'] || "/etc/arvados/config.yml"
-      Rails.logger.info("reload_config: polling for updated mtime on #{conffile} with threshold #{Rails.configuration.SourceTimestamp}")
+      Rails.logger.info("reload_config: polling for updated mtime on #{conffile} with threshold #{t_lastload}")
       while true
         sleep 1
         t = File.mtime(conffile)
-        if t.to_f > Rails.configuration.SourceTimestamp.to_f
+        # If the file is newer than 5s, re-read it even if the
+        # timestamp matches the previously loaded file. This enables
+        # us to detect changes even if the filesystem's timestamp
+        # precision cannot represent multiple updates per second.
+        if t.to_f != t_lastload.to_f || Time.now.to_f - t.to_f < 5
+          Open3.popen2("arvados-server", "config-dump", "-skip-legacy") do |stdin, stdout, status_thread|
+            confs = YAML.load(stdout, deserialize_symbols: false)
+            hash = confs["SourceSHA256"]
+          rescue => e
+            Rails.logger.info("reload_config: config file updated but could not be loaded: #{e}")
+            t_lastload = t
+            continue
+          end
+          if hash == hash_lastload
+            # If we reloaded a new or updated file, but the content is
+            # identical, keep polling instead of restarting.
+            t_lastload = t
+            continue
+          end
+
           restartfile = Rails.root.join('tmp', 'restart.txt')
           touchtime = Time.now
           Rails.logger.info("reload_config: mtime on #{conffile} changed to #{t}, touching #{restartfile} to #{touchtime}")

commit 784ef6318884a2e866390e1650185e4e43c875be
Author: Tom Clegg <tom at curii.com>
Date:   Tue Apr 26 15:32:24 2022 -0400

    18794: Don't start config-check thread in packaging tasks.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/api/config/initializers/reload_config.rb b/services/api/config/initializers/reload_config.rb
index af266cae3..6a7eac7a5 100644
--- a/services/api/config/initializers/reload_config.rb
+++ b/services/api/config/initializers/reload_config.rb
@@ -4,6 +4,8 @@
 
 if !File.owned?(Rails.root.join('tmp'))
   Rails.logger.debug("reload_config: not owner of #{Rails.root}/tmp, skipping")
+elsif ENV["ARVADOS_CONFIG"] == "none"
+  Rails.logger.debug("reload_config: no config in use, skipping")
 else
   Thread.new do
     lockfile = Rails.root.join('tmp', 'reload_config.lock')

commit 7edeceb6f2a768b37ee09500f8c87ca6a2fb0be6
Author: Tom Clegg <tom at curii.com>
Date:   Tue Apr 26 15:14:50 2022 -0400

    18794: Recommend "arvados-server check" after upgrade/config change.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/_includes/_restart_api.liquid b/doc/_includes/_restart_api.liquid
index c3e0330b8..d6c5c3433 100644
--- a/doc/_includes/_restart_api.liquid
+++ b/doc/_includes/_restart_api.liquid
@@ -1,8 +1,15 @@
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
 h2(#restart-api). Restart the API server and controller
 
 *Make sure the cluster config file is up to date on the API server host* then restart the API server and controller processes to ensure the configuration changes are visible to the whole cluster.
 
 <notextile>
 <pre><code># <span class="userinput">systemctl restart nginx arvados-controller</span>
+# <span class="userinput">arvados-server check</span>
 </code></pre>
 </notextile>
diff --git a/doc/admin/maintenance-and-upgrading.html.textile.liquid b/doc/admin/maintenance-and-upgrading.html.textile.liquid
index 0d517fe91..3cc80a356 100644
--- a/doc/admin/maintenance-and-upgrading.html.textile.liquid
+++ b/doc/admin/maintenance-and-upgrading.html.textile.liquid
@@ -50,6 +50,8 @@ h3(#restart). Restart the services affected by the change
 
 If you know which Arvados service uses the specific configuration that was modified, restart those services. When in doubt, restart all Arvados system services.
 
+To check for services that have not restarted since the configuration file was updated, run the @arvados-server check@ command on each system node.
+
 h2(#upgrading). Upgrading Arvados
 
 Upgrading Arvados typically involves the following steps:
@@ -61,4 +63,6 @@ Upgrading Arvados typically involves the following steps:
 # rebuild and deploy the "compute node image":{{site.baseurl}}/install/crunch2-cloud/install-compute-node.html (cloud only)
 # Install new packages using @apt-get upgrade@ or @yum upgrade at .
 # Wait for package installation scripts as they perform any necessary data migrations.
+# Run @arvados-server config-check@ to detect configuration errors or deprecated entries.
 # Verify that the Arvados services were restarted as part of the package upgrades.
+# Run @arvados-server check@ to detect services that did not restart properly.
diff --git a/doc/install/install-api-server.html.textile.liquid b/doc/install/install-api-server.html.textile.liquid
index 7d0353c9e..6c3eabba4 100644
--- a/doc/install/install-api-server.html.textile.liquid
+++ b/doc/install/install-api-server.html.textile.liquid
@@ -211,8 +211,7 @@ Confirm working Rails API server:
 
 Confirm that you can use the system root token to act as the system root user:
 
-<notextile><pre><code>
-$ curl -H "Authorization: Bearer $system_root_token" https://<span class="userinput">ClusterID.example.com</span>/arvados/v1/users/current
+<notextile><pre><code>$ curl -H "Authorization: Bearer $system_root_token" https://<span class="userinput">ClusterID.example.com</span>/arvados/v1/users/current
 </code></pre></notextile>
 
 h3. Troubleshooting

commit 8a5e460875fd427c28f85acbc27c7b92ff803207
Author: Tom Clegg <tom at curii.com>
Date:   Tue Apr 26 15:04:31 2022 -0400

    18794: "check" defaults to succinct human-readable output.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/sdk/go/health/aggregator.go b/sdk/go/health/aggregator.go
index 07fc72e08..5e010d88b 100644
--- a/sdk/go/health/aggregator.go
+++ b/sdk/go/health/aggregator.go
@@ -151,7 +151,7 @@ func (agg *Aggregator) ClusterHealth() ClusterHealthResponse {
 		// Ensure svc is listed in resp.Services.
 		mtx.Lock()
 		if _, ok := resp.Services[svcName]; !ok {
-			resp.Services[svcName] = ServiceHealth{Health: "NONE"}
+			resp.Services[svcName] = ServiceHealth{Health: "MISSING"}
 		}
 		mtx.Unlock()
 
@@ -187,13 +187,18 @@ func (agg *Aggregator) ClusterHealth() ClusterHealthResponse {
 				mtx.Lock()
 				defer mtx.Unlock()
 				resp.Checks[fmt.Sprintf("%s+%s", svcName, pingURL)] = result
-				if result.Health == "OK" {
+				if result.Health == "OK" || result.Health == "SKIP" {
 					h := resp.Services[svcName]
 					h.N++
-					h.Health = "OK"
+					if result.Health == "OK" || h.N == 1 {
+						// "" => "SKIP" or "OK"
+						// "SKIP" => "OK"
+						h.Health = result.Health
+					}
 					resp.Services[svcName] = h
-				} else if result.Health != "SKIP" {
+				} else {
 					resp.Health = "ERROR"
+					resp.Errors = append(resp.Errors, fmt.Sprintf("%s: %s: %s", svcName, result.Health, result.Error))
 				}
 			}(svcName, addr)
 		}
@@ -214,6 +219,7 @@ func (agg *Aggregator) ClusterHealth() ClusterHealthResponse {
 		default:
 			if sh.Health != "OK" && sh.Health != "SKIP" {
 				resp.Health = "ERROR"
+				resp.Errors = append(resp.Errors, fmt.Sprintf("%s: %s: no InternalURLs configured", svcName, sh.Health))
 				continue
 			}
 		}
@@ -387,6 +393,7 @@ func (ccmd checkCommand) run(ctx context.Context, prog string, args []string, st
 	loader.SetupFlags(flags)
 	versionFlag := flags.Bool("version", false, "Write version information to stdout and exit 0")
 	timeout := flags.Duration("timeout", defaultTimeout.Duration(), "Maximum time to wait for health responses")
+	outputYAML := flags.Bool("yaml", false, "Output full health report in YAML format (default mode shows errors as plain text, is silent on success)")
 	if ok, _ := cmd.ParseFlags(flags, prog, args, "", stderr); !ok {
 		// cmd.ParseFlags already reported the error
 		return errSilent
@@ -408,13 +415,23 @@ func (ccmd checkCommand) run(ctx context.Context, prog string, args []string, st
 	ctx = ctxlog.Context(ctx, logger)
 	agg := Aggregator{Cluster: cluster, timeout: arvados.Duration(*timeout)}
 	resp := agg.ClusterHealth()
-	buf, err := yaml.Marshal(resp)
-	if err != nil {
-		return err
+	if *outputYAML {
+		y, err := yaml.Marshal(resp)
+		if err != nil {
+			return err
+		}
+		stdout.Write(y)
+		if resp.Health != "OK" {
+			return errSilent
+		}
+		return nil
 	}
-	stdout.Write(buf)
 	if resp.Health != "OK" {
-		return fmt.Errorf("health check failed")
+		for _, msg := range resp.Errors {
+			fmt.Fprintln(stdout, msg)
+		}
+		fmt.Fprintln(stderr, "health check failed")
+		return errSilent
 	}
 	return nil
 }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list