From: Peter Amstutz Date: Mon, 23 Jul 2018 13:31:51 +0000 (-0400) Subject: 13791: Health check endpoint docs wip X-Git-Tag: 1.2.0~55^2~3 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/a77e606772cd4909f2401f55cd7c3c08f8325fb8 13791: Health check endpoint docs wip Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- diff --git a/doc/_config.yml b/doc/_config.yml index 075111d921..3cf6fb377a 100644 --- a/doc/_config.yml +++ b/doc/_config.yml @@ -147,15 +147,21 @@ navbar: admin: - Topics: - admin/index.html.textile.liquid + - Upgrading and migrations: - admin/upgrading.html.textile.liquid + - install/migrate-docker19.html.textile.liquid + - Users and Groups: - install/cheat_sheet.html.textile.liquid - - user/topics/arvados-sync-groups.html.textile.liquid - - admin/storage-classes.html.textile.liquid - admin/activation.html.textile.liquid - - admin/migrating-providers.html.textile.liquid - admin/merge-remote-account.html.textile.liquid + - admin/migrating-providers.html.textile.liquid + - user/topics/arvados-sync-groups.html.textile.liquid + - Monitoring: + - admin/health-checks.html.textile.liquid + - admin/metrics.html.textile.liquid + - Cloud: + - admin/storage-classes.html.textile.liquid - admin/spot-instances.html.textile.liquid - - install/migrate-docker19.html.textile.liquid installguide: - Overview: - install/index.html.textile.liquid diff --git a/doc/admin/health-checks.html.textile.liquid b/doc/admin/health-checks.html.textile.liquid new file mode 100644 index 0000000000..64ce5ee493 --- /dev/null +++ b/doc/admin/health-checks.html.textile.liquid @@ -0,0 +1,91 @@ +--- +layout: default +navsection: admin +title: Health checks +... + +{% comment %} +Copyright (C) The Arvados Authors. All rights reserved. + +SPDX-License-Identifier: CC-BY-SA-3.0 +{% endcomment %} + +Arvados services support endpoints for monitoring the status of a cluster. + +Health check endpoints are found at @/_health/ping@ for many Arvados services. + +Services must have ManagementToken configured. This is used to authorize access to the health check endpoint. If ManagementToken is not configured, health checks will return the error @404 disabled@. + +The requester must provide the HTTP header @Authorization: Bearer (ManagementToken)@. + +This endpoint returns a JSON object with the field @health@. This has a value of either @OK@ or @ERROR@. On error, it may also include a field @error@ with additional information. + +h2. How to enable health checks on each service. + +h3. API server + +Set @MangementToken@ in @application.yml@ + +
+  # Token to be included in all healthcheck requests. Disabled by default.
+  # Server expects request header of the format "Authorization: Bearer xxx"
+  ManagementToken: ...
+
+ +h3. Node Manager + +Set @port@ (the listen port) and @MangementToken@ in the @Manage@ section of @node-manager.ini@ . + +
+[Manage]
+port=8888
+ManagementToken=...
+
+ + +* +* keepstore +* keep-web +* keepproxy +* arv-git-httpd +* websockets + +h2. Healthcheck aggregator + +The service @arvados-health@ performs health checks on all configured services and returns a single value of @OK@ or @ERROR@ for the entire cluster. It exposes the endpoint @/_health/all@ . + +The healthcheck aggregator uses the "NodeProfile" section of the cluster-wide configuration file. Here is an example. + +
+Cluster:
+  # The cluster uuid prefix
+  zzzzz:
+    NodeProfile:
+      # For each node, the profile name corresponds to a
+      # locally-resolvable hostname, and describes which Arvados
+      # services are available on that machine.
+      api:
+        arvados-controller:
+          Listen: 8000
+        arvados-api-server:
+          Listen: 8001
+      manage:
+	arvados-node-manager:
+	  Listen: 8002
+      workbench:
+	arvados-workbench:
+	  Listen: 8003
+	arvados-ws:
+	  Listen: 8004
+      keep:
+	keep-web:
+	  Listen: 8005
+	keepproxy:
+	  Listen: 8006
+      keep0:
+        keepstore:
+	  Listen: 25701
+      keep1:
+        keepstore:
+	  Listen: 25701
+
diff --git a/doc/admin/metrics.html.textile.liquid b/doc/admin/metrics.html.textile.liquid new file mode 100644 index 0000000000..fb33ccbd9e --- /dev/null +++ b/doc/admin/metrics.html.textile.liquid @@ -0,0 +1,13 @@ +--- +layout: default +navsection: admin +title: Metrics +... + +{% comment %} +Copyright (C) The Arvados Authors. All rights reserved. + +SPDX-License-Identifier: CC-BY-SA-3.0 +{% endcomment %} + +Arvados services support endpoints for monitoring the performance of a cluster.