From 9e988394278b9c0c072c27107b67669875b8fca7 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Wed, 16 Nov 2022 11:28:06 -0500 Subject: [PATCH] 19364: Document arvados-server check command. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- doc/admin/health-checks.html.textile.liquid | 37 ++++++++++++++++++++- sdk/go/health/aggregator.go | 2 +- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/doc/admin/health-checks.html.textile.liquid b/doc/admin/health-checks.html.textile.liquid index 7c87826964..fa273cd204 100644 --- a/doc/admin/health-checks.html.textile.liquid +++ b/doc/admin/health-checks.html.textile.liquid @@ -29,8 +29,43 @@ Health check endpoints return a JSON object with the field @health@. This has a } -h2. Healthcheck aggregator +h2. Health check aggregator The service @arvados-health@ performs health checks on all configured services and returns a single value of @OK@ or @ERROR@ for the entire cluster. It exposes the endpoint @/_health/all@ . The healthcheck aggregator uses the @Services@ section of the cluster-wide @config.yml@ configuration file. + +h2. Health check command + +The @arvados-server check@ command is another way to perform the same health checks as the health check aggregator service. It does not depend on the aggregator service. + +If all checks pass, it writes @health check OK@ to stderr (unless the @-quiet@ flag is used) and exits 0. Otherwise, it writes error messages to stderr and exits with error status. + +@arvados-server check -yaml@ outputs a YAML document on stdout with additional details about each service endpoint that was checked. + +{% codeblock as yaml %} +Checks: + "arvados-api-server+http://localhost:8004/_health/ping": + ClockTime: "2022-11-16T16:08:57Z" + ConfigSourceSHA256: e2c086ae3dd290cf029cb3fe79146529622279b6280cf6cd17dc8d8c30daa57f + ConfigSourceTimestamp: "2022-11-07T18:08:24.539545Z" + HTTPStatusCode: 200 + Health: OK + Response: + health: OK + ResponseTime: 0.017159 + Server: nginx/1.14.0 + Phusion Passenger(R) 6.0.15 + Version: 2.5.0~dev20221116141533 + "arvados-controller+http://localhost:8003/_health/ping": + ClockTime: "2022-11-16T16:08:57Z" + ConfigSourceSHA256: e2c086ae3dd290cf029cb3fe79146529622279b6280cf6cd17dc8d8c30daa57f + ConfigSourceTimestamp: "2022-11-07T18:08:24.539545Z" + HTTPStatusCode: 200 + Health: OK + Response: + health: OK + ResponseTime: 0.004748 + Server: "" + Version: 2.5.0~dev20221116141533 (go1.18.8) +# ... +{% endcodeblock %} diff --git a/sdk/go/health/aggregator.go b/sdk/go/health/aggregator.go index 6fb33dc608..3bf37b1294 100644 --- a/sdk/go/health/aggregator.go +++ b/sdk/go/health/aggregator.go @@ -455,7 +455,7 @@ func (ccmd checkCommand) run(ctx context.Context, prog string, args []string, st versionFlag := flags.Bool("version", false, "Write version information to stdout and exit 0") timeout := flags.Duration("timeout", defaultTimeout.Duration(), "Maximum time to wait for health responses") quiet := flags.Bool("quiet", false, "Silent on success (suppress 'health check OK' message on stderr)") - outputYAML := flags.Bool("yaml", false, "Output full health report in YAML format (default mode shows errors as plain text, is silent on success)") + outputYAML := flags.Bool("yaml", false, "Output full health report in YAML format (default mode prints 'health check OK' or plain text errors)") if ok, _ := cmd.ParseFlags(flags, prog, args, "", stderr); !ok { // cmd.ParseFlags already reported the error return errSilent -- 2.30.2