20953: Properly pass auth data to HTTPS endpoints that need it.
authorLucas Di Pentima <lucas.dipentima@curii.com>
Wed, 25 Oct 2023 19:24:01 +0000 (16:24 -0300)
committerLucas Di Pentima <lucas.dipentima@curii.com>
Wed, 25 Oct 2023 20:20:46 +0000 (17:20 -0300)
This makes all HTTP responses green (200) on the grafana dashboard.

Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima@curii.com>

tools/salt-install/config_examples/multi_host/aws/pillars/prometheus_server.sls

index 143097b4a454fc7a58c804fe6ed1b5d02e5f2580..e6714ae8a7adafdc9926f400207a7435b5014500 100644 (file)
@@ -28,9 +28,38 @@ prometheus:
               timeout: 5s
               http:
                 valid_http_versions: [HTTP/1.1, HTTP/2]
-                valid_status_codes: []  # Default is [200]
+                valid_status_codes: [200]
+                method: GET
+                tls_config:
+                  insecure_skip_verify: true # Avoid failures on self-signed certs
                 fail_if_ssl: false
-                fail_if_not_ssl: false
+                fail_if_not_ssl: true
+            http_2xx_mngmt_token:
+              prober: http
+              timeout: 5s
+              http:
+                valid_http_versions: [HTTP/1.1, HTTP/2]
+                valid_status_codes: [200]
+                method: GET
+                bearer_token: __MANAGEMENT_TOKEN__
+                tls_config:
+                  insecure_skip_verify: true # Avoid failures on self-signed certs
+                fail_if_ssl: false
+                fail_if_not_ssl: true
+            http_2xx_basic_auth:
+              prober: http
+              timeout: 5s
+              http:
+                valid_http_versions: [HTTP/1.1, HTTP/2]
+                valid_status_codes: [200]
+                method: GET
+                basic_auth:
+                  username: "__MONITORING_USERNAME__"
+                  password: "__MONITORING_PASSWORD__"
+                tls_config:
+                  insecure_skip_verify: true # Avoid failures on self-signed certs
+                fail_if_ssl: false
+                fail_if_not_ssl: true
       prometheus:
         service:
            args:
@@ -57,30 +86,56 @@ prometheus:
               params:
                 module: [http_2xx]
               static_configs:
-                - targets: ['https://__DOMAIN__']
-                  labels:
-                    instance: controller.__CLUSTER__
                 - targets: ['https://workbench.__DOMAIN__']
                   labels:
                     instance: workbench.__CLUSTER__
                 - targets: ['https://workbench2.__DOMAIN__']
                   labels:
                     instance: workbench2.__CLUSTER__
-                - targets: ['https://download.__DOMAIN__']
+                - targets: ['https://webshell.__DOMAIN__']
+                  labels:
+                    instance: webshell.__CLUSTER__
+              relabel_configs:
+                - source_labels: [__address__]
+                  target_label: __param_target
+                - source_labels: [__param_target]
+                  target_label: instance
+                - target_label: __address__
+                  replacement: 127.0.0.1:9115          # blackbox exporter.
+
+            - job_name: http_probe_mngmt_token
+              metrics_path: /probe
+              params:
+                module: [http_2xx_mngmt_token]
+              static_configs:
+                - targets: ['https://__DOMAIN__/_health/ping']
+                  labels:
+                    instance: controller.__CLUSTER__
+                - targets: ['https://download.__DOMAIN__/_health/ping']
                   labels:
                     instance: download.__CLUSTER__
+                - targets: ['https://ws.__DOMAIN__/_health/ping']
+                  labels:
+                    instance: ws.__CLUSTER__
+              relabel_configs:
+                - source_labels: [__address__]
+                  target_label: __param_target
+                - source_labels: [__param_target]
+                  target_label: instance
+                - target_label: __address__
+                  replacement: 127.0.0.1:9115          # blackbox exporter.
+
+            - job_name: http_probe_basic_auth
+              metrics_path: /probe
+              params:
+                module: [http_2xx_basic_auth]
+              static_configs:
                 - targets: ['https://grafana.__DOMAIN__']
                   labels:
                     instance: grafana.__CLUSTER__
                 - targets: ['https://prometheus.__DOMAIN__']
                   labels:
                     instance: prometheus.__CLUSTER__
-                - targets: ['https://webshell.__DOMAIN__']
-                  labels:
-                    instance: webshell.__CLUSTER__
-                - targets: ['https://ws.__DOMAIN__']
-                  labels:
-                    instance: ws.__CLUSTER__
               relabel_configs:
                 - source_labels: [__address__]
                   target_label: __param_target