13791: Health check endpoint docs wip
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Mon, 23 Jul 2018 13:31:51 +0000 (09:31 -0400)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Mon, 23 Jul 2018 13:34:22 +0000 (09:34 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

doc/_config.yml
doc/admin/health-checks.html.textile.liquid [new file with mode: 0644]
doc/admin/metrics.html.textile.liquid [new file with mode: 0644]

index 075111d921602bb1a959a2fedaa3bbc747ebb863..3cf6fb377a2d6618ce9db821b069c5d8bcfceffa 100644 (file)
@@ -147,15 +147,21 @@ navbar:
   admin:
     - Topics:
       - admin/index.html.textile.liquid
+    - Upgrading and migrations:
       - admin/upgrading.html.textile.liquid
+      - install/migrate-docker19.html.textile.liquid
+    - Users and Groups:
       - install/cheat_sheet.html.textile.liquid
-      - user/topics/arvados-sync-groups.html.textile.liquid
-      - admin/storage-classes.html.textile.liquid
       - admin/activation.html.textile.liquid
-      - admin/migrating-providers.html.textile.liquid
       - admin/merge-remote-account.html.textile.liquid
+      - admin/migrating-providers.html.textile.liquid
+      - user/topics/arvados-sync-groups.html.textile.liquid
+    - Monitoring:
+      - admin/health-checks.html.textile.liquid
+      - admin/metrics.html.textile.liquid
+    - Cloud:
+      - admin/storage-classes.html.textile.liquid
       - admin/spot-instances.html.textile.liquid
-      - install/migrate-docker19.html.textile.liquid
   installguide:
     - Overview:
       - install/index.html.textile.liquid
diff --git a/doc/admin/health-checks.html.textile.liquid b/doc/admin/health-checks.html.textile.liquid
new file mode 100644 (file)
index 0000000..64ce5ee
--- /dev/null
@@ -0,0 +1,91 @@
+---
+layout: default
+navsection: admin
+title: Health checks
+...
+
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+Arvados services support endpoints for monitoring the status of a cluster.
+
+Health check endpoints are found at @/_health/ping@ for many Arvados services.
+
+Services must have ManagementToken configured.  This is used to authorize access to the health check endpoint.  If ManagementToken is not configured, health checks will return the error @404 disabled@.
+
+The requester must provide the HTTP header @Authorization: Bearer (ManagementToken)@.
+
+This endpoint returns a JSON object with the field @health@.  This has a value of either @OK@ or @ERROR@.  On error, it may also include a  field @error@ with additional information.
+
+h2. How to enable health checks on each service.
+
+h3. API server
+
+Set @MangementToken@ in @application.yml@
+
+<pre>
+  # Token to be included in all healthcheck requests. Disabled by default.
+  # Server expects request header of the format "Authorization: Bearer xxx"
+  ManagementToken: ...
+</pre>
+
+h3. Node Manager
+
+Set @port@ (the listen port) and @MangementToken@ in the @Manage@ section of @node-manager.ini@ .
+
+<pre>
+[Manage]
+port=8888
+ManagementToken=...
+</pre>
+
+
+*
+* keepstore
+* keep-web
+* keepproxy
+* arv-git-httpd
+* websockets
+
+h2. Healthcheck aggregator
+
+The service @arvados-health@ performs health checks on all configured services and returns a single value of @OK@ or @ERROR@ for the entire cluster.  It exposes the endpoint @/_health/all@ .
+
+The healthcheck aggregator uses the "NodeProfile" section of the cluster-wide configuration file.  Here is an example.
+
+<pre>
+Cluster:
+  # The cluster uuid prefix
+  zzzzz:
+    NodeProfile:
+      # For each node, the profile name corresponds to a
+      # locally-resolvable hostname, and describes which Arvados
+      # services are available on that machine.
+      api:
+        arvados-controller:
+          Listen: 8000
+        arvados-api-server:
+          Listen: 8001
+      manage:
+       arvados-node-manager:
+         Listen: 8002
+      workbench:
+       arvados-workbench:
+         Listen: 8003
+       arvados-ws:
+         Listen: 8004
+      keep:
+       keep-web:
+         Listen: 8005
+       keepproxy:
+         Listen: 8006
+      keep0:
+        keepstore:
+         Listen: 25701
+      keep1:
+        keepstore:
+         Listen: 25701
+</pre>
diff --git a/doc/admin/metrics.html.textile.liquid b/doc/admin/metrics.html.textile.liquid
new file mode 100644 (file)
index 0000000..fb33ccb
--- /dev/null
@@ -0,0 +1,13 @@
+---
+layout: default
+navsection: admin
+title: Metrics
+...
+
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+Arvados services support endpoints for monitoring the performance of a cluster.