[arvados] created: 2.1.0-3070-g273d4dda7

git repository hosting git at public.arvados.org
Mon Nov 21 15:27:25 UTC 2022


        at  273d4dda75bad4b1ba18bc3616f16082b95c0467 (commit)


commit 273d4dda75bad4b1ba18bc3616f16082b95c0467
Author: Tom Clegg <tom at curii.com>
Date:   Mon Nov 21 10:26:34 2022 -0500

    19364: Cancel container request when timing out.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/diagnostics/cmd.go b/lib/diagnostics/cmd.go
index 3e3ac8675..ed963e1ef 100644
--- a/lib/diagnostics/cmd.go
+++ b/lib/diagnostics/cmd.go
@@ -703,12 +703,11 @@ func (diag *diagnoser) runtests() {
 
 		timeout := 10 * time.Minute
 		diag.infof("container request submitted, waiting up to %v for container to run", arvados.Duration(timeout))
-		ctx, cancel = context.WithDeadline(context.Background(), time.Now().Add(timeout))
-		defer cancel()
+		deadline := time.Now().Add(timeout)
 
 		var c arvados.Container
-		for ; cr.State != arvados.ContainerRequestStateFinal; time.Sleep(2 * time.Second) {
-			ctx, cancel := context.WithDeadline(ctx, time.Now().Add(diag.timeout))
+		for ; cr.State != arvados.ContainerRequestStateFinal && time.Now().Before(deadline); time.Sleep(2 * time.Second) {
+			ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
 			defer cancel()
 
 			crStateWas := cr.State
@@ -728,11 +727,26 @@ func (diag *diagnoser) runtests() {
 			if c.State != cStateWas {
 				diag.debugf("container state = %s", c.State)
 			}
+
+			cancel()
 		}
 
+		if cr.State != arvados.ContainerRequestStateFinal {
+			err := client.RequestAndDecodeContext(context.Background(), &cr, "PATCH", "arvados/v1/container_requests/"+cr.UUID, nil, map[string]interface{}{
+				"container_request": map[string]interface{}{
+					"priority": 0,
+				}})
+			if err != nil {
+				diag.infof("error canceling container request %s: %s", cr.UUID, err)
+			} else {
+				diag.debugf("canceled container request %s", cr.UUID)
+			}
+			return fmt.Errorf("timed out waiting for container to finish; container request %s state was %q, container %s state was %q", cr.UUID, cr.State, c.UUID, c.State)
+		}
 		if c.State != arvados.ContainerStateComplete {
 			return fmt.Errorf("container request %s is final but container %s did not complete: container state = %q", cr.UUID, cr.ContainerUUID, c.State)
-		} else if c.ExitCode != 0 {
+		}
+		if c.ExitCode != 0 {
 			return fmt.Errorf("container exited %d", c.ExitCode)
 		}
 		return nil

commit 37180569bd74f4cbe1797898436634a5f686d832
Author: Tom Clegg <tom at curii.com>
Date:   Mon Nov 21 09:54:42 2022 -0500

    19364: Proofreading.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/admin/diagnostics.html.textile.liquid b/doc/admin/diagnostics.html.textile.liquid
index f4157a511..ec6a9bf9d 100644
--- a/doc/admin/diagnostics.html.textile.liquid
+++ b/doc/admin/diagnostics.html.textile.liquid
@@ -24,7 +24,7 @@ When run this way, diagnostics will also include "health checks":health-checks.h
 
 h2. Using regular user privileges
 
-On any node (server node, shell node, or a workstation outside the system network), you can also run diagnostics using by setting the usual @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables. Typically this is done with a regular user account.
+On any node (server node, shell node, or a workstation outside the system network), you can also run diagnostics by setting the usual @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables. Typically this is done with a regular user account.
 
 <notextile><pre>
 $ <span class="userinput">export ARVADOS_API_HOST=zzzzz.arvadosapi.com</span>

commit 531fd553a1b83c546066c1d2a2619f86e17b6d20
Merge: 75d0bce4f d760c01e2
Author: Tom Clegg <tom at curii.com>
Date:   Fri Nov 18 10:21:19 2022 -0500

    19364: Merge branch 'main' into 19364-diag-docs
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>


commit 75d0bce4f378efc488b67b178ace50301f9ad8ff
Author: Tom Clegg <tom at curii.com>
Date:   Thu Nov 17 16:33:56 2022 -0500

    19364: Add admin>diagnostics page.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/_config.yml b/doc/_config.yml
index 35ec48388..5c8d77382 100644
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -184,6 +184,7 @@ navbar:
       - admin/logging.html.textile.liquid
       - admin/metrics.html.textile.liquid
       - admin/health-checks.html.textile.liquid
+      - admin/diagnostics.html.textile.liquid
       - admin/management-token.html.textile.liquid
       - admin/user-activity.html.textile.liquid
     - Data Management:
diff --git a/doc/admin/diagnostics.html.textile.liquid b/doc/admin/diagnostics.html.textile.liquid
new file mode 100644
index 000000000..f4157a511
--- /dev/null
+++ b/doc/admin/diagnostics.html.textile.liquid
@@ -0,0 +1,83 @@
+---
+layout: default
+navsection: admin
+title: Diagnostics
+...
+
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+The @arvados-client diagnostics@ command exercises basic cluster functionality, and identifies some common installation and configuration problems. Especially after upgrading or reconfiguring Arvados or server/network infrastructure, it can be the quickest way to identify problems.
+
+h2. Using system privileges
+
+On a server node, it is easiest to run the diagnostics command with system privileges. The word @sudo@ here instructs the @arvados-client@ command to load @Controller.ExternalURL@ and @SystemRootToken@ from @/etc/arvados/config.yml@ and use those credentials to run tests with system privileges.
+
+When run this way, diagnostics will also include "health checks":health-checks.html.
+
+<notextile><pre>
+# <span class="userinput">arvados-client sudo diagnostics</span>
+</pre></notextile>
+
+h2. Using regular user privileges
+
+On any node (server node, shell node, or a workstation outside the system network), you can also run diagnostics using by setting the usual @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables. Typically this is done with a regular user account.
+
+<notextile><pre>
+$ <span class="userinput">export ARVADOS_API_HOST=zzzzz.arvadosapi.com</span>
+$ <span class="userinput">export ARVADOS_API_TOKEN=xxxxxxxxxx</span>
+$ <span class="userinput">arvados-client diagnostics</span>
+</pre></notextile>
+
+h2. Internal/external client detection
+
+The diagnostics output indicates whether its client connection is categorized by the server as internal or external. If you run diagnostics automatically with cron or a monitoring tool, you can use the @-internal-client@ or @-external-client@ flag to specify how you _expect_ the client to be categorized, and the test will fail otherwise. Example:
+
+<notextile><pre>
+# <span class="userinput">arvados-client sudo diagnostics -internal-client</span>
+[...]
+
+--- cut here --- error summary ---
+
+ERROR     60: checking internal/external client detection (11 ms): expecting internal=true external=false, but found internal=false external=true
+</pre></notextile>
+
+h2. Example output
+
+<notextile><pre>
+# <span class="userinput">arvados-client sudo diagnostics</span>
+INFO       5: running health check (same as `arvados-server check`)
+INFO      10: getting discovery document from https://zzzzz.arvadosapi.com/discovery/v1/apis/arvados/v1/rest
+INFO      20: getting exported config from https://zzzzz.arvadosapi.com/arvados/v1/config
+INFO      30: getting current user record
+INFO      40: connecting to service endpoint https://keep.zzzzz.arvadosapi.com/
+INFO      41: connecting to service endpoint https://*.collections.zzzzz.arvadosapi.com/
+INFO      42: connecting to service endpoint https://download.zzzzz.arvadosapi.com/
+INFO      43: connecting to service endpoint wss://ws.zzzzz.arvadosapi.com/websocket
+INFO      44: connecting to service endpoint https://workbench.zzzzz.arvadosapi.com/
+INFO      45: connecting to service endpoint https://workbench2.zzzzz.arvadosapi.com/
+INFO      50: checking CORS headers at https://zzzzz.arvadosapi.com/
+INFO      51: checking CORS headers at https://keep.zzzzz.arvadosapi.com/d41d8cd98f00b204e9800998ecf8427e+0
+INFO      52: checking CORS headers at https://download.zzzzz.arvadosapi.com/
+INFO      60: checking internal/external client detection
+INFO      61: reading+writing via keep service at https://keep.zzzzz.arvadosapi.com:443/
+INFO      80: finding/creating "scratch area for diagnostics" project
+INFO      90: creating temporary collection
+INFO     100: uploading file via webdav
+INFO     110: checking WebDAV ExternalURL wildcard (https://*.collections.zzzzz.arvadosapi.com/)
+INFO     120: downloading from webdav (https://d41d8cd98f00b204e9800998ecf8427e-0.collections.zzzzz.arvadosapi.com/foo)
+INFO     121: downloading from webdav (https://d41d8cd98f00b204e9800998ecf8427e-0.collections.zzzzz.arvadosapi.com/sha256:feb5d9fea6a5e9606aa995e879d862b825965ba48de054caab5ef356dc6b3412.tar)
+INFO     122: downloading from webdav (https://download.zzzzz.arvadosapi.com/c=d41d8cd98f00b204e9800998ecf8427e+0/_/foo)
+INFO     123: downloading from webdav (https://download.zzzzz.arvadosapi.com/c=d41d8cd98f00b204e9800998ecf8427e+0/_/sha256:feb5d9fea6a5e9606aa995e879d862b825965ba48de054caab5ef356dc6b3412.tar)
+INFO     124: downloading from webdav (https://a15a27cbc1c7d2d4a0d9e02529aaec7e-128.collections.zzzzz.arvadosapi.com/sha256:feb5d9fea6a5e9606aa995e879d862b825965ba48de054caab5ef356dc6b3412.tar)
+INFO     125: downloading from webdav (https://download.zzzzz.arvadosapi.com/c=zzzzz-4zz18-twitqma8mbvwydy/_/sha256:feb5d9fea6a5e9606aa995e879d862b825965ba48de054caab5ef356dc6b3412.tar)
+INFO     130: getting list of virtual machines
+INFO     140: getting workbench1 webshell page
+INFO     150: connecting to webshell service
+INFO     160: running a container
+INFO      ... container request submitted, waiting up to 10m for container to run
+INFO    9990: deleting temporary collection
+</pre></notextile>
diff --git a/lib/diagnostics/cmd.go b/lib/diagnostics/cmd.go
index 9c229c9b4..3e3ac8675 100644
--- a/lib/diagnostics/cmd.go
+++ b/lib/diagnostics/cmd.go
@@ -318,9 +318,9 @@ func (diag *diagnoser) runtests() {
 		isInternal := found["proxy"] == 0 && len(keeplist.Items) > 0
 		isExternal := found["proxy"] > 0 && found["proxy"] == len(keeplist.Items)
 		if isExternal {
-			diag.verbosef("controller returned only proxy services, this host is treated as \"external\"")
+			diag.infof("controller returned only proxy services, this host is treated as \"external\"")
 		} else if isInternal {
-			diag.verbosef("controller returned only non-proxy services, this host is treated as \"internal\"")
+			diag.infof("controller returned only non-proxy services, this host is treated as \"internal\"")
 		}
 		if (diag.checkInternal && !isInternal) || (diag.checkExternal && !isExternal) {
 			return fmt.Errorf("expecting internal=%v external=%v, but found internal=%v external=%v", diag.checkInternal, diag.checkExternal, isInternal, isExternal)

commit 9e988394278b9c0c072c27107b67669875b8fca7
Author: Tom Clegg <tom at curii.com>
Date:   Wed Nov 16 11:28:06 2022 -0500

    19364: Document arvados-server check command.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/admin/health-checks.html.textile.liquid b/doc/admin/health-checks.html.textile.liquid
index 7c8782696..fa273cd20 100644
--- a/doc/admin/health-checks.html.textile.liquid
+++ b/doc/admin/health-checks.html.textile.liquid
@@ -29,8 +29,43 @@ Health check endpoints return a JSON object with the field @health at .  This has a
 }
 </pre>
 
-h2. Healthcheck aggregator
+h2. Health check aggregator
 
 The service @arvados-health@ performs health checks on all configured services and returns a single value of @OK@ or @ERROR@ for the entire cluster.  It exposes the endpoint @/_health/all@ .
 
 The healthcheck aggregator uses the @Services@ section of the cluster-wide @config.yml@ configuration file.
+
+h2. Health check command
+
+The @arvados-server check@ command is another way to perform the same health checks as the health check aggregator service. It does not depend on the aggregator service.
+
+If all checks pass, it writes @health check OK@ to stderr (unless the @-quiet@ flag is used) and exits 0. Otherwise, it writes error messages to stderr and exits with error status.
+
+ at arvados-server check -yaml@ outputs a YAML document on stdout with additional details about each service endpoint that was checked.
+
+{% codeblock as yaml %}
+Checks:
+  "arvados-api-server+http://localhost:8004/_health/ping":
+    ClockTime: "2022-11-16T16:08:57Z"
+    ConfigSourceSHA256: e2c086ae3dd290cf029cb3fe79146529622279b6280cf6cd17dc8d8c30daa57f
+    ConfigSourceTimestamp: "2022-11-07T18:08:24.539545Z"
+    HTTPStatusCode: 200
+    Health: OK
+    Response:
+      health: OK
+    ResponseTime: 0.017159
+    Server: nginx/1.14.0 + Phusion Passenger(R) 6.0.15
+    Version: 2.5.0~dev20221116141533
+  "arvados-controller+http://localhost:8003/_health/ping":
+    ClockTime: "2022-11-16T16:08:57Z"
+    ConfigSourceSHA256: e2c086ae3dd290cf029cb3fe79146529622279b6280cf6cd17dc8d8c30daa57f
+    ConfigSourceTimestamp: "2022-11-07T18:08:24.539545Z"
+    HTTPStatusCode: 200
+    Health: OK
+    Response:
+      health: OK
+    ResponseTime: 0.004748
+    Server: ""
+    Version: 2.5.0~dev20221116141533 (go1.18.8)
+# ...
+{% endcodeblock %}
diff --git a/sdk/go/health/aggregator.go b/sdk/go/health/aggregator.go
index 6fb33dc60..3bf37b129 100644
--- a/sdk/go/health/aggregator.go
+++ b/sdk/go/health/aggregator.go
@@ -455,7 +455,7 @@ func (ccmd checkCommand) run(ctx context.Context, prog string, args []string, st
 	versionFlag := flags.Bool("version", false, "Write version information to stdout and exit 0")
 	timeout := flags.Duration("timeout", defaultTimeout.Duration(), "Maximum time to wait for health responses")
 	quiet := flags.Bool("quiet", false, "Silent on success (suppress 'health check OK' message on stderr)")
-	outputYAML := flags.Bool("yaml", false, "Output full health report in YAML format (default mode shows errors as plain text, is silent on success)")
+	outputYAML := flags.Bool("yaml", false, "Output full health report in YAML format (default mode prints 'health check OK' or plain text errors)")
 	if ok, _ := cmd.ParseFlags(flags, prog, args, "", stderr); !ok {
 		// cmd.ParseFlags already reported the error
 		return errSilent

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list