20894: Change default retention on prometheus to 90 days
[arvados.git] / tools / salt-install / config_examples / multi_host / aws / pillars / prometheus_server.sls
index 7b4a09f50c609e5430ee964070250720d87552b7..650553de74bfe566f0f382bd6565a65567120486 100644 (file)
@@ -3,6 +3,9 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+{%- set controller_nodes = "__CONTROLLER_NODES__".split(',') %}
+{%- set enable_balancer = ("__ENABLE_BALANCER__"|to_bool) %}
+
 ### PROMETHEUS
 prometheus:
   wanted:
@@ -18,6 +21,10 @@ prometheus:
           global:
             scrape_interval: 15s
             evaluation_interval: 15s
+         storage:
+           tsdb:
+             retention:
+               time: 90d
           rule_files:
             - rules.yml
 
@@ -36,30 +43,43 @@ prometheus:
               bearer_token: __MANAGEMENT_TOKEN__
               scheme: https
               static_configs:
-                - targets: ['ws.__CLUSTER__.__DOMAIN__:443']
+                - targets: ['ws.__DOMAIN__:443']
                   labels:
                     instance: ws.__CLUSTER__
                     cluster: __CLUSTER__
             - job_name: arvados_controller
               bearer_token: __MANAGEMENT_TOKEN__
+              {%- if enable_balancer %}
+              scheme: http
+              {%- else %}
               scheme: https
+              {%- endif %}
               static_configs:
-                - targets: ['__CLUSTER__.__DOMAIN__:443']
+                {%- if enable_balancer %}
+                  {%- for controller in controller_nodes %}
+                - targets: ['{{ controller }}']
+                  labels:
+                    instance: {{ controller.split('.')[0] }}.__CLUSTER__
+                    cluster: __CLUSTER__
+                  {%- endfor %}
+                {%- else %}
+                - targets: ['__DOMAIN__:443']
                   labels:
                     instance: controller.__CLUSTER__
                     cluster: __CLUSTER__
+                {%- endif %}
             - job_name: keep_web
               bearer_token: __MANAGEMENT_TOKEN__
               scheme: https
               static_configs:
-                - targets: ['keep.__CLUSTER__.__DOMAIN__:443']
+                - targets: ['keep.__DOMAIN__:443']
                   labels:
                     instance: keep-web.__CLUSTER__
                     cluster: __CLUSTER__
             - job_name: keep_balance
               bearer_token: __MANAGEMENT_TOKEN__
               static_configs:
-                - targets: ['__CONTROLLER_INT_IP__:9005']
+                - targets: ['__KEEPBALANCE_INT_IP__:9005']
                   labels:
                     instance: keep-balance.__CLUSTER__
                     cluster: __CLUSTER__
@@ -73,7 +93,7 @@ prometheus:
             - job_name: arvados_dispatch_cloud
               bearer_token: __MANAGEMENT_TOKEN__
               static_configs:
-                - targets: ['__CONTROLLER_INT_IP__:9006']
+                - targets: ['__DISPATCHER_INT_IP__:9006']
                   labels:
                     instance: arvados-dispatch-cloud.__CLUSTER__
                     cluster: __CLUSTER__
@@ -90,15 +110,15 @@ prometheus:
                     cluster: __CLUSTER__
 
             # Nodes
+            {%- set node_list = "__NODELIST__".split(',') %}
+            {%- set nodes = [] %}
+            {%- for node in node_list %}
+              {%- set _ = nodes.append(node.split('.')[0]) %}
+            {%- endfor %}
             - job_name: node
               static_configs:
-                {% for node in [
-                  'controller',
-                  'keep0',
-                  'workbench',
-                  'shell',
-                ] %}
-                - targets: [ "{{ node }}.__CLUSTER__.__DOMAIN__:9100" ]
+                {% for node in nodes %}
+                - targets: [ "{{ node }}.__DOMAIN__:9100" ]
                   labels:
                     instance: "{{ node }}.__CLUSTER__"
                     cluster: __CLUSTER__