20953: Adds blackbox-exporter HTTP probes and SSL Monitor grafana dashboard.
[arvados.git] / tools / salt-install / config_examples / multi_host / aws / pillars / prometheus_server.sls
index 6dc90c840b84f0fe534d608f341dc8f2373c40ab..143097b4a454fc7a58c804fe6ed1b5d02e5f2580 100644 (file)
@@ -5,6 +5,7 @@
 
 {%- set controller_nodes = "__CONTROLLER_NODES__".split(',') %}
 {%- set enable_balancer = ("__ENABLE_BALANCER__"|to_bool) %}
+{%- set data_retention_time = "__PROMETHEUS_DATA_RETENTION_TIME__" %}
 
 ### PROMETHEUS
 prometheus:
@@ -13,10 +14,27 @@ prometheus:
       - prometheus
       - alertmanager
       - node_exporter
+      - blackbox_exporter
   pkg:
-    use_upstream_repo: true
+    use_upstream_repo: false
+    use_upstream_archive: true
     component:
+      blackbox_exporter:
+        config_file: /etc/prometheus/blackbox_exporter.yml
+        config:
+          modules:
+            http_2xx:
+              prober: http
+              timeout: 5s
+              http:
+                valid_http_versions: [HTTP/1.1, HTTP/2]
+                valid_status_codes: []  # Default is [200]
+                fail_if_ssl: false
+                fail_if_not_ssl: false
       prometheus:
+        service:
+           args:
+             storage.tsdb.retention.time: {{ data_retention_time }}
         config:
           global:
             scrape_interval: 15s
@@ -34,6 +52,43 @@ prometheus:
                   instance: mon.__CLUSTER__
                   cluster: __CLUSTER__
 
+            - job_name: http_probe
+              metrics_path: /probe
+              params:
+                module: [http_2xx]
+              static_configs:
+                - targets: ['https://__DOMAIN__']
+                  labels:
+                    instance: controller.__CLUSTER__
+                - targets: ['https://workbench.__DOMAIN__']
+                  labels:
+                    instance: workbench.__CLUSTER__
+                - targets: ['https://workbench2.__DOMAIN__']
+                  labels:
+                    instance: workbench2.__CLUSTER__
+                - targets: ['https://download.__DOMAIN__']
+                  labels:
+                    instance: download.__CLUSTER__
+                - targets: ['https://grafana.__DOMAIN__']
+                  labels:
+                    instance: grafana.__CLUSTER__
+                - targets: ['https://prometheus.__DOMAIN__']
+                  labels:
+                    instance: prometheus.__CLUSTER__
+                - targets: ['https://webshell.__DOMAIN__']
+                  labels:
+                    instance: webshell.__CLUSTER__
+                - targets: ['https://ws.__DOMAIN__']
+                  labels:
+                    instance: ws.__CLUSTER__
+              relabel_configs:
+                - source_labels: [__address__]
+                  target_label: __param_target
+                - source_labels: [__param_target]
+                  target_label: instance
+                - target_label: __address__
+                  replacement: 127.0.0.1:9115          # blackbox exporter.
+
             ## Arvados unique jobs
             - job_name: arvados_ws
               bearer_token: __MANAGEMENT_TOKEN__
@@ -94,6 +149,7 @@ prometheus:
                     instance: arvados-dispatch-cloud.__CLUSTER__
                     cluster: __CLUSTER__
 
+            {%- if "__DATABASE_INT_IP__" != "" %}
             # Database
             - job_name: postgresql
               static_configs:
@@ -104,6 +160,7 @@ prometheus:
                   labels:
                     instance: database.__CLUSTER__
                     cluster: __CLUSTER__
+            {%- endif %}
 
             # Nodes
             {%- set node_list = "__NODELIST__".split(',') %}