From 75d0bce4f378efc488b67b178ace50301f9ad8ff Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 17 Nov 2022 16:33:56 -0500 Subject: [PATCH] 19364: Add admin>diagnostics page. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- doc/_config.yml | 1 + doc/admin/diagnostics.html.textile.liquid | 83 +++++++++++++++++++++++ lib/diagnostics/cmd.go | 4 +- 3 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 doc/admin/diagnostics.html.textile.liquid diff --git a/doc/_config.yml b/doc/_config.yml index 35ec483887..5c8d77382e 100644 --- a/doc/_config.yml +++ b/doc/_config.yml @@ -184,6 +184,7 @@ navbar: - admin/logging.html.textile.liquid - admin/metrics.html.textile.liquid - admin/health-checks.html.textile.liquid + - admin/diagnostics.html.textile.liquid - admin/management-token.html.textile.liquid - admin/user-activity.html.textile.liquid - Data Management: diff --git a/doc/admin/diagnostics.html.textile.liquid b/doc/admin/diagnostics.html.textile.liquid new file mode 100644 index 0000000000..f4157a5116 --- /dev/null +++ b/doc/admin/diagnostics.html.textile.liquid @@ -0,0 +1,83 @@ +--- +layout: default +navsection: admin +title: Diagnostics +... + +{% comment %} +Copyright (C) The Arvados Authors. All rights reserved. + +SPDX-License-Identifier: CC-BY-SA-3.0 +{% endcomment %} + +The @arvados-client diagnostics@ command exercises basic cluster functionality, and identifies some common installation and configuration problems. Especially after upgrading or reconfiguring Arvados or server/network infrastructure, it can be the quickest way to identify problems. + +h2. Using system privileges + +On a server node, it is easiest to run the diagnostics command with system privileges. The word @sudo@ here instructs the @arvados-client@ command to load @Controller.ExternalURL@ and @SystemRootToken@ from @/etc/arvados/config.yml@ and use those credentials to run tests with system privileges. + +When run this way, diagnostics will also include "health checks":health-checks.html. + +
+# arvados-client sudo diagnostics
+
+ +h2. Using regular user privileges + +On any node (server node, shell node, or a workstation outside the system network), you can also run diagnostics using by setting the usual @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables. Typically this is done with a regular user account. + +
+$ export ARVADOS_API_HOST=zzzzz.arvadosapi.com
+$ export ARVADOS_API_TOKEN=xxxxxxxxxx
+$ arvados-client diagnostics
+
+ +h2. Internal/external client detection + +The diagnostics output indicates whether its client connection is categorized by the server as internal or external. If you run diagnostics automatically with cron or a monitoring tool, you can use the @-internal-client@ or @-external-client@ flag to specify how you _expect_ the client to be categorized, and the test will fail otherwise. Example: + +
+# arvados-client sudo diagnostics -internal-client
+[...]
+
+--- cut here --- error summary ---
+
+ERROR     60: checking internal/external client detection (11 ms): expecting internal=true external=false, but found internal=false external=true
+
+ +h2. Example output + +
+# arvados-client sudo diagnostics
+INFO       5: running health check (same as `arvados-server check`)
+INFO      10: getting discovery document from https://zzzzz.arvadosapi.com/discovery/v1/apis/arvados/v1/rest
+INFO      20: getting exported config from https://zzzzz.arvadosapi.com/arvados/v1/config
+INFO      30: getting current user record
+INFO      40: connecting to service endpoint https://keep.zzzzz.arvadosapi.com/
+INFO      41: connecting to service endpoint https://*.collections.zzzzz.arvadosapi.com/
+INFO      42: connecting to service endpoint https://download.zzzzz.arvadosapi.com/
+INFO      43: connecting to service endpoint wss://ws.zzzzz.arvadosapi.com/websocket
+INFO      44: connecting to service endpoint https://workbench.zzzzz.arvadosapi.com/
+INFO      45: connecting to service endpoint https://workbench2.zzzzz.arvadosapi.com/
+INFO      50: checking CORS headers at https://zzzzz.arvadosapi.com/
+INFO      51: checking CORS headers at https://keep.zzzzz.arvadosapi.com/d41d8cd98f00b204e9800998ecf8427e+0
+INFO      52: checking CORS headers at https://download.zzzzz.arvadosapi.com/
+INFO      60: checking internal/external client detection
+INFO      61: reading+writing via keep service at https://keep.zzzzz.arvadosapi.com:443/
+INFO      80: finding/creating "scratch area for diagnostics" project
+INFO      90: creating temporary collection
+INFO     100: uploading file via webdav
+INFO     110: checking WebDAV ExternalURL wildcard (https://*.collections.zzzzz.arvadosapi.com/)
+INFO     120: downloading from webdav (https://d41d8cd98f00b204e9800998ecf8427e-0.collections.zzzzz.arvadosapi.com/foo)
+INFO     121: downloading from webdav (https://d41d8cd98f00b204e9800998ecf8427e-0.collections.zzzzz.arvadosapi.com/sha256:feb5d9fea6a5e9606aa995e879d862b825965ba48de054caab5ef356dc6b3412.tar)
+INFO     122: downloading from webdav (https://download.zzzzz.arvadosapi.com/c=d41d8cd98f00b204e9800998ecf8427e+0/_/foo)
+INFO     123: downloading from webdav (https://download.zzzzz.arvadosapi.com/c=d41d8cd98f00b204e9800998ecf8427e+0/_/sha256:feb5d9fea6a5e9606aa995e879d862b825965ba48de054caab5ef356dc6b3412.tar)
+INFO     124: downloading from webdav (https://a15a27cbc1c7d2d4a0d9e02529aaec7e-128.collections.zzzzz.arvadosapi.com/sha256:feb5d9fea6a5e9606aa995e879d862b825965ba48de054caab5ef356dc6b3412.tar)
+INFO     125: downloading from webdav (https://download.zzzzz.arvadosapi.com/c=zzzzz-4zz18-twitqma8mbvwydy/_/sha256:feb5d9fea6a5e9606aa995e879d862b825965ba48de054caab5ef356dc6b3412.tar)
+INFO     130: getting list of virtual machines
+INFO     140: getting workbench1 webshell page
+INFO     150: connecting to webshell service
+INFO     160: running a container
+INFO      ... container request submitted, waiting up to 10m for container to run
+INFO    9990: deleting temporary collection
+
diff --git a/lib/diagnostics/cmd.go b/lib/diagnostics/cmd.go index 9c229c9b4e..3e3ac86757 100644 --- a/lib/diagnostics/cmd.go +++ b/lib/diagnostics/cmd.go @@ -318,9 +318,9 @@ func (diag *diagnoser) runtests() { isInternal := found["proxy"] == 0 && len(keeplist.Items) > 0 isExternal := found["proxy"] > 0 && found["proxy"] == len(keeplist.Items) if isExternal { - diag.verbosef("controller returned only proxy services, this host is treated as \"external\"") + diag.infof("controller returned only proxy services, this host is treated as \"external\"") } else if isInternal { - diag.verbosef("controller returned only non-proxy services, this host is treated as \"internal\"") + diag.infof("controller returned only non-proxy services, this host is treated as \"internal\"") } if (diag.checkInternal && !isInternal) || (diag.checkExternal && !isExternal) { return fmt.Errorf("expecting internal=%v external=%v, but found internal=%v external=%v", diag.checkInternal, diag.checkExternal, isInternal, isExternal) -- 2.30.2