20953: Adds blackbox-exporter HTTP probes and SSL Monitor grafana dashboard.
[arvados.git] / tools / salt-install / config_examples / multi_host / aws / pillars / prometheus_server.sls
1 ---
2 # Copyright (C) The Arvados Authors. All rights reserved.
3 #
4 # SPDX-License-Identifier: AGPL-3.0
5
6 {%- set controller_nodes = "__CONTROLLER_NODES__".split(',') %}
7 {%- set enable_balancer = ("__ENABLE_BALANCER__"|to_bool) %}
8 {%- set data_retention_time = "__PROMETHEUS_DATA_RETENTION_TIME__" %}
9
10 ### PROMETHEUS
11 prometheus:
12   wanted:
13     component:
14       - prometheus
15       - alertmanager
16       - node_exporter
17       - blackbox_exporter
18   pkg:
19     use_upstream_repo: false
20     use_upstream_archive: true
21     component:
22       blackbox_exporter:
23         config_file: /etc/prometheus/blackbox_exporter.yml
24         config:
25           modules:
26             http_2xx:
27               prober: http
28               timeout: 5s
29               http:
30                 valid_http_versions: [HTTP/1.1, HTTP/2]
31                 valid_status_codes: []  # Default is [200]
32                 fail_if_ssl: false
33                 fail_if_not_ssl: false
34       prometheus:
35         service:
36            args:
37              storage.tsdb.retention.time: {{ data_retention_time }}
38         config:
39           global:
40             scrape_interval: 15s
41             evaluation_interval: 15s
42           rule_files:
43             - rules.yml
44
45           scrape_configs:
46             - job_name: prometheus
47               # metrics_path defaults to /metrics
48               # scheme defaults to http.
49               static_configs:
50               - targets: ['localhost:9090']
51                 labels:
52                   instance: mon.__CLUSTER__
53                   cluster: __CLUSTER__
54
55             - job_name: http_probe
56               metrics_path: /probe
57               params:
58                 module: [http_2xx]
59               static_configs:
60                 - targets: ['https://__DOMAIN__']
61                   labels:
62                     instance: controller.__CLUSTER__
63                 - targets: ['https://workbench.__DOMAIN__']
64                   labels:
65                     instance: workbench.__CLUSTER__
66                 - targets: ['https://workbench2.__DOMAIN__']
67                   labels:
68                     instance: workbench2.__CLUSTER__
69                 - targets: ['https://download.__DOMAIN__']
70                   labels:
71                     instance: download.__CLUSTER__
72                 - targets: ['https://grafana.__DOMAIN__']
73                   labels:
74                     instance: grafana.__CLUSTER__
75                 - targets: ['https://prometheus.__DOMAIN__']
76                   labels:
77                     instance: prometheus.__CLUSTER__
78                 - targets: ['https://webshell.__DOMAIN__']
79                   labels:
80                     instance: webshell.__CLUSTER__
81                 - targets: ['https://ws.__DOMAIN__']
82                   labels:
83                     instance: ws.__CLUSTER__
84               relabel_configs:
85                 - source_labels: [__address__]
86                   target_label: __param_target
87                 - source_labels: [__param_target]
88                   target_label: instance
89                 - target_label: __address__
90                   replacement: 127.0.0.1:9115          # blackbox exporter.
91
92             ## Arvados unique jobs
93             - job_name: arvados_ws
94               bearer_token: __MANAGEMENT_TOKEN__
95               scheme: https
96               static_configs:
97                 - targets: ['ws.__DOMAIN__:443']
98                   labels:
99                     instance: ws.__CLUSTER__
100                     cluster: __CLUSTER__
101             - job_name: arvados_controller
102               bearer_token: __MANAGEMENT_TOKEN__
103               {%- if enable_balancer %}
104               scheme: http
105               {%- else %}
106               scheme: https
107               {%- endif %}
108               static_configs:
109                 {%- if enable_balancer %}
110                   {%- for controller in controller_nodes %}
111                 - targets: ['{{ controller }}']
112                   labels:
113                     instance: {{ controller.split('.')[0] }}.__CLUSTER__
114                     cluster: __CLUSTER__
115                   {%- endfor %}
116                 {%- else %}
117                 - targets: ['__DOMAIN__:443']
118                   labels:
119                     instance: controller.__CLUSTER__
120                     cluster: __CLUSTER__
121                 {%- endif %}
122             - job_name: keep_web
123               bearer_token: __MANAGEMENT_TOKEN__
124               scheme: https
125               static_configs:
126                 - targets: ['keep.__DOMAIN__:443']
127                   labels:
128                     instance: keep-web.__CLUSTER__
129                     cluster: __CLUSTER__
130             - job_name: keep_balance
131               bearer_token: __MANAGEMENT_TOKEN__
132               static_configs:
133                 - targets: ['__KEEPBALANCE_INT_IP__:9005']
134                   labels:
135                     instance: keep-balance.__CLUSTER__
136                     cluster: __CLUSTER__
137             - job_name: keepstore
138               bearer_token: __MANAGEMENT_TOKEN__
139               static_configs:
140                 - targets: ['__KEEPSTORE0_INT_IP__:25107']
141                   labels:
142                     instance: keep0.__CLUSTER__
143                     cluster: __CLUSTER__
144             - job_name: arvados_dispatch_cloud
145               bearer_token: __MANAGEMENT_TOKEN__
146               static_configs:
147                 - targets: ['__DISPATCHER_INT_IP__:9006']
148                   labels:
149                     instance: arvados-dispatch-cloud.__CLUSTER__
150                     cluster: __CLUSTER__
151
152             {%- if "__DATABASE_INT_IP__" != "" %}
153             # Database
154             - job_name: postgresql
155               static_configs:
156                 - targets: [
157                     '__DATABASE_INT_IP__:9187',
158                     '__DATABASE_INT_IP__:3903'
159                   ]
160                   labels:
161                     instance: database.__CLUSTER__
162                     cluster: __CLUSTER__
163             {%- endif %}
164
165             # Nodes
166             {%- set node_list = "__NODELIST__".split(',') %}
167             {%- set nodes = [] %}
168             {%- for node in node_list %}
169               {%- set _ = nodes.append(node.split('.')[0]) %}
170             {%- endfor %}
171             - job_name: node
172               static_configs:
173                 {% for node in nodes %}
174                 - targets: [ "{{ node }}.__DOMAIN__:9100" ]
175                   labels:
176                     instance: "{{ node }}.__CLUSTER__"
177                     cluster: __CLUSTER__
178                 {% endfor %}