Merge branch '21640-max-nofile'
[arvados.git] / tools / salt-install / config_examples / multi_host / aws / pillars / prometheus_server.sls
index 26877f35df13166769d9ee8ac841cb0d82097ac7..e6714ae8a7adafdc9926f400207a7435b5014500 100644 (file)
@@ -14,10 +14,52 @@ prometheus:
       - prometheus
       - alertmanager
       - node_exporter
+      - blackbox_exporter
   pkg:
     use_upstream_repo: false
     use_upstream_archive: true
     component:
+      blackbox_exporter:
+        config_file: /etc/prometheus/blackbox_exporter.yml
+        config:
+          modules:
+            http_2xx:
+              prober: http
+              timeout: 5s
+              http:
+                valid_http_versions: [HTTP/1.1, HTTP/2]
+                valid_status_codes: [200]
+                method: GET
+                tls_config:
+                  insecure_skip_verify: true # Avoid failures on self-signed certs
+                fail_if_ssl: false
+                fail_if_not_ssl: true
+            http_2xx_mngmt_token:
+              prober: http
+              timeout: 5s
+              http:
+                valid_http_versions: [HTTP/1.1, HTTP/2]
+                valid_status_codes: [200]
+                method: GET
+                bearer_token: __MANAGEMENT_TOKEN__
+                tls_config:
+                  insecure_skip_verify: true # Avoid failures on self-signed certs
+                fail_if_ssl: false
+                fail_if_not_ssl: true
+            http_2xx_basic_auth:
+              prober: http
+              timeout: 5s
+              http:
+                valid_http_versions: [HTTP/1.1, HTTP/2]
+                valid_status_codes: [200]
+                method: GET
+                basic_auth:
+                  username: "__MONITORING_USERNAME__"
+                  password: "__MONITORING_PASSWORD__"
+                tls_config:
+                  insecure_skip_verify: true # Avoid failures on self-signed certs
+                fail_if_ssl: false
+                fail_if_not_ssl: true
       prometheus:
         service:
            args:
@@ -39,6 +81,69 @@ prometheus:
                   instance: mon.__CLUSTER__
                   cluster: __CLUSTER__
 
+            - job_name: http_probe
+              metrics_path: /probe
+              params:
+                module: [http_2xx]
+              static_configs:
+                - targets: ['https://workbench.__DOMAIN__']
+                  labels:
+                    instance: workbench.__CLUSTER__
+                - targets: ['https://workbench2.__DOMAIN__']
+                  labels:
+                    instance: workbench2.__CLUSTER__
+                - targets: ['https://webshell.__DOMAIN__']
+                  labels:
+                    instance: webshell.__CLUSTER__
+              relabel_configs:
+                - source_labels: [__address__]
+                  target_label: __param_target
+                - source_labels: [__param_target]
+                  target_label: instance
+                - target_label: __address__
+                  replacement: 127.0.0.1:9115          # blackbox exporter.
+
+            - job_name: http_probe_mngmt_token
+              metrics_path: /probe
+              params:
+                module: [http_2xx_mngmt_token]
+              static_configs:
+                - targets: ['https://__DOMAIN__/_health/ping']
+                  labels:
+                    instance: controller.__CLUSTER__
+                - targets: ['https://download.__DOMAIN__/_health/ping']
+                  labels:
+                    instance: download.__CLUSTER__
+                - targets: ['https://ws.__DOMAIN__/_health/ping']
+                  labels:
+                    instance: ws.__CLUSTER__
+              relabel_configs:
+                - source_labels: [__address__]
+                  target_label: __param_target
+                - source_labels: [__param_target]
+                  target_label: instance
+                - target_label: __address__
+                  replacement: 127.0.0.1:9115          # blackbox exporter.
+
+            - job_name: http_probe_basic_auth
+              metrics_path: /probe
+              params:
+                module: [http_2xx_basic_auth]
+              static_configs:
+                - targets: ['https://grafana.__DOMAIN__']
+                  labels:
+                    instance: grafana.__CLUSTER__
+                - targets: ['https://prometheus.__DOMAIN__']
+                  labels:
+                    instance: prometheus.__CLUSTER__
+              relabel_configs:
+                - source_labels: [__address__]
+                  target_label: __param_target
+                - source_labels: [__param_target]
+                  target_label: instance
+                - target_label: __address__
+                  replacement: 127.0.0.1:9115          # blackbox exporter.
+
             ## Arvados unique jobs
             - job_name: arvados_ws
               bearer_token: __MANAGEMENT_TOKEN__