4 "name": "DS_PROMETHEUS",
8 "pluginId": "prometheus",
9 "pluginName": "Prometheus"
18 "uid": "${DS_PROMETHEUS}"
22 "iconColor": "rgba(0, 211, 255, 1)",
23 "name": "Annotations & Alerts",
35 "fiscalYearStartMonth": 0,
48 "uid": "${DS_PROMETHEUS}"
58 "hiddenSeries": false,
71 "nullPointMode": "null as zero",
73 "alertThreshold": true
76 "pluginVersion": "9.4.3",
82 "$$hashKey": "object:424",
85 "transform": "negative-Y"
95 "uid": "${DS_PROMETHEUS}"
97 "expr": "sum(rate(arvados_keepstore_volume_io_bytes{}[1m])) without (operation,device_id)",
99 "legendFormat": "{{ instance }} {{ direction }}",
105 "title": "Keepstore bandwidth [1m]",
109 "value_type": "individual"
119 "$$hashKey": "object:159",
125 "$$hashKey": "object:160",
141 "type": "prometheus",
142 "uid": "${DS_PROMETHEUS}"
158 "hiddenSeries": false,
171 "nullPointMode": "null as zero",
173 "alertThreshold": true
176 "pluginVersion": "9.4.3",
180 "seriesOverrides": [],
183 "steppedLine": false,
187 "type": "prometheus",
188 "uid": "${DS_PROMETHEUS}"
190 "expr": "arvados_dispatchcloud_containers_running{}",
192 "legendFormat": "# containers",
198 "title": "Containers running",
202 "value_type": "individual"
212 "$$hashKey": "object:973",
220 "$$hashKey": "object:974",
236 "type": "prometheus",
237 "uid": "${DS_PROMETHEUS}"
253 "hiddenSeries": false,
266 "nullPointMode": "null",
268 "alertThreshold": true
271 "pluginVersion": "9.4.3",
275 "seriesOverrides": [],
278 "steppedLine": false,
282 "type": "prometheus",
283 "uid": "${DS_PROMETHEUS}"
285 "expr": "sum(rate(arvados_keepstore_volume_operations{}[1m])) without (operation,device_id)",
287 "legendFormat": "{{instance}}",
293 "title": "Keepstore volume operations rate/second",
297 "value_type": "individual"
307 "$$hashKey": "object:982",
314 "$$hashKey": "object:983",
330 "type": "prometheus",
331 "uid": "${DS_PROMETHEUS}"
347 "hiddenSeries": false,
360 "nullPointMode": "null as zero",
362 "alertThreshold": true
365 "pluginVersion": "9.4.3",
369 "seriesOverrides": [],
372 "steppedLine": false,
376 "type": "prometheus",
377 "uid": "${DS_PROMETHEUS}"
379 "expr": "arvados_dispatchcloud_queue_entries{}",
381 "legendFormat": "{{instance_type}} {{state}}",
386 "type": "prometheus",
387 "uid": "${DS_PROMETHEUS}"
389 "expr": "arvados_dispatchcloud_containers_allocated_not_started{}",
391 "legendFormat": "allocated, not started",
396 "type": "prometheus",
397 "uid": "${DS_PROMETHEUS}"
399 "expr": "arvados_dispatchcloud_containers_not_allocated_over_quota{}",
401 "legendFormat": "not allocated, over quota",
407 "title": "Queue: # containers per {state, instance type}",
411 "value_type": "individual"
421 "$$hashKey": "object:4306",
428 "$$hashKey": "object:4307",
444 "type": "prometheus",
445 "uid": "${DS_PROMETHEUS}"
461 "hiddenSeries": false,
474 "nullPointMode": "null",
476 "alertThreshold": true
479 "pluginVersion": "9.4.3",
483 "seriesOverrides": [],
486 "steppedLine": false,
490 "type": "prometheus",
491 "uid": "${DS_PROMETHEUS}"
493 "expr": "arvados_keepstore_bufferpool_inuse_buffers{}",
495 "legendFormat": "{{instance}}",
501 "title": "Keepstore buffers in use",
505 "value_type": "individual"
515 "$$hashKey": "object:929",
522 "$$hashKey": "object:930",
538 "type": "prometheus",
539 "uid": "${DS_PROMETHEUS}"
549 "hiddenSeries": false,
562 "nullPointMode": "null as zero",
564 "alertThreshold": true
567 "pluginVersion": "9.4.3",
571 "seriesOverrides": [],
574 "steppedLine": false,
578 "type": "prometheus",
579 "uid": "${DS_PROMETHEUS}"
581 "expr": "arvados_dispatchcloud_containers_longest_wait_time_seconds{}",
583 "legendFormat": "Longest wait time",
588 "type": "prometheus",
589 "uid": "${DS_PROMETHEUS}"
591 "expr": "rate(arvados_dispatchcloud_containers_time_from_queue_to_crunch_run_seconds_sum{}[10m]) / rate(arvados_dispatchcloud_containers_time_from_queue_to_crunch_run_seconds_count{}[10m])",
593 "legendFormat": "avg wait time [10m]",
599 "title": "Container wait times",
603 "value_type": "individual"
613 "$$hashKey": "object:138",
620 "$$hashKey": "object:139",
636 "type": "prometheus",
637 "uid": "${DS_PROMETHEUS}"
653 "hiddenSeries": false,
666 "nullPointMode": "null",
668 "alertThreshold": true
671 "pluginVersion": "9.4.3",
675 "seriesOverrides": [],
678 "steppedLine": false,
682 "type": "prometheus",
683 "uid": "${DS_PROMETHEUS}"
685 "expr": "arvados_keep_total_bytes{}",
687 "legendFormat": "Total stored",
692 "type": "prometheus",
693 "uid": "${DS_PROMETHEUS}"
695 "expr": "arvados_keep_overreplicated_bytes{}",
697 "legendFormat": "Overreplicated",
702 "type": "prometheus",
703 "uid": "${DS_PROMETHEUS}"
705 "expr": "arvados_keep_underreplicated_bytes{}",
707 "legendFormat": "Underreplicated",
712 "type": "prometheus",
713 "uid": "${DS_PROMETHEUS}"
715 "expr": "arvados_keep_lost_bytes{}",
717 "legendFormat": "Lost",
723 "title": "Total bytes by type",
727 "value_type": "individual"
737 "$$hashKey": "object:304",
739 "format": "decbytes",
746 "$$hashKey": "object:305",
762 "type": "prometheus",
763 "uid": "${DS_PROMETHEUS}"
773 "hiddenSeries": false,
786 "nullPointMode": "null as zero",
788 "alertThreshold": true
791 "pluginVersion": "9.4.3",
795 "seriesOverrides": [],
798 "steppedLine": false,
802 "type": "prometheus",
803 "uid": "${DS_PROMETHEUS}"
805 "expr": "rate(arvados_dispatchcloud_instances_time_to_ssh_seconds_sum{}[10m]) / rate(arvados_dispatchcloud_instances_time_to_ssh_seconds_count{}[10m])",
808 "legendFormat": "ssh",
813 "type": "prometheus",
814 "uid": "${DS_PROMETHEUS}"
816 "expr": "rate(arvados_dispatchcloud_instances_time_to_ready_for_container_seconds_sum{}[10m]) / rate(arvados_dispatchcloud_instances_time_to_ready_for_container_seconds_count{}[10m])",
818 "legendFormat": "ready",
824 "title": "Instance time to ... avg [10m]",
828 "value_type": "individual"
838 "$$hashKey": "object:113",
845 "$$hashKey": "object:114",
861 "type": "prometheus",
862 "uid": "${DS_PROMETHEUS}"
872 "hiddenSeries": false,
885 "nullPointMode": "null",
887 "alertThreshold": true
890 "pluginVersion": "9.4.3",
894 "seriesOverrides": [],
897 "steppedLine": false,
901 "type": "prometheus",
902 "uid": "${DS_PROMETHEUS}"
904 "expr": "arvados_concurrent_requests{}",
906 "legendFormat": "{{instance}}",
912 "title": "Concurrent requests",
916 "value_type": "individual"
926 "$$hashKey": "object:109",
933 "$$hashKey": "object:110",
949 "type": "prometheus",
950 "uid": "${DS_PROMETHEUS}"
966 "hiddenSeries": false,
979 "nullPointMode": "null as zero",
981 "alertThreshold": true
984 "pluginVersion": "9.4.3",
988 "seriesOverrides": [],
991 "steppedLine": false,
995 "type": "prometheus",
996 "uid": "${DS_PROMETHEUS}"
998 "expr": "arvados_dispatchcloud_boot_outcomes{}",
1000 "legendFormat": "{{outcome}}",
1006 "title": "Boot outcomes",
1010 "value_type": "individual"
1020 "$$hashKey": "object:921",
1027 "$$hashKey": "object:922",
1043 "type": "prometheus",
1044 "uid": "${DS_PROMETHEUS}"
1060 "hiddenSeries": false,
1073 "nullPointMode": "null as zero",
1075 "alertThreshold": true
1077 "percentage": false,
1078 "pluginVersion": "9.4.3",
1082 "seriesOverrides": [],
1085 "steppedLine": false,
1089 "type": "prometheus",
1090 "uid": "${DS_PROMETHEUS}"
1092 "expr": "sum(arvados_dispatchcloud_instances_price{})",
1094 "intervalFactor": 10,
1095 "legendFormat": "cost ($)",
1105 "value_type": "individual"
1115 "$$hashKey": "object:623",
1117 "label": "$ / hour",
1123 "$$hashKey": "object:624",
1139 "type": "prometheus",
1140 "uid": "${DS_PROMETHEUS}"
1156 "hiddenSeries": false,
1169 "nullPointMode": "null as zero",
1171 "alertThreshold": true
1173 "percentage": false,
1174 "pluginVersion": "9.4.3",
1178 "seriesOverrides": [],
1181 "steppedLine": false,
1185 "type": "prometheus",
1186 "uid": "${DS_PROMETHEUS}"
1188 "expr": "arvados_dispatchcloud_instances_disappeared{}",
1190 "legendFormat": "{{state}}",
1196 "title": "instance state before disappearance",
1200 "value_type": "individual"
1210 "$$hashKey": "object:1025",
1217 "$$hashKey": "object:1026",
1233 "type": "prometheus",
1234 "uid": "${DS_PROMETHEUS}"
1250 "hiddenSeries": false,
1263 "nullPointMode": "null as zero",
1264 "percentage": false,
1265 "pluginVersion": "8.4.5",
1269 "seriesOverrides": [],
1272 "steppedLine": false,
1276 "type": "prometheus",
1277 "uid": "${DS_PROMETHEUS}"
1279 "expr": "arvados_dispatchcloud_instances_price{}",
1281 "intervalFactor": 10,
1282 "legendFormat": "{{category}}",
1288 "title": "Cost by node state",
1292 "value_type": "individual"
1302 "$$hashKey": "object:574",
1304 "label": "$ / hour",
1310 "$$hashKey": "object:575",
1326 "type": "prometheus",
1327 "uid": "${DS_PROMETHEUS}"
1337 "hiddenSeries": false,
1350 "nullPointMode": "null as zero",
1351 "percentage": false,
1352 "pluginVersion": "8.4.5",
1356 "seriesOverrides": [],
1359 "steppedLine": false,
1363 "type": "prometheus",
1364 "uid": "${DS_PROMETHEUS}"
1366 "expr": "rate(arvados_dispatchcloud_instances_time_from_shutdown_request_to_disappearance_seconds_sum{}[10m]) / rate(arvados_dispatchcloud_instances_time_from_shutdown_request_to_disappearance_seconds_count{}[10m])",
1368 "legendFormat": "shutdown to disappearance",
1374 "title": "Instances time from shutdown to disappearance avg[10m]",
1378 "value_type": "individual"
1388 "$$hashKey": "object:450",
1395 "$$hashKey": "object:451",
1411 "type": "prometheus",
1412 "uid": "${DS_PROMETHEUS}"
1428 "hiddenSeries": false,
1441 "nullPointMode": "null as zero",
1442 "percentage": false,
1443 "pluginVersion": "8.4.5",
1447 "seriesOverrides": [],
1450 "steppedLine": false,
1454 "type": "prometheus",
1455 "uid": "${DS_PROMETHEUS}"
1457 "expr": "arvados_dispatchcloud_instances_total{}",
1460 "legendFormat": "{{instance_type}} : {{category}}",
1466 "$$hashKey": "object:540",
1467 "colorMode": "critical",
1475 "title": "Nodes by state",
1479 "value_type": "individual"
1489 "$$hashKey": "object:723",
1496 "$$hashKey": "object:724",
1512 "type": "prometheus",
1513 "uid": "${DS_PROMETHEUS}"
1523 "hiddenSeries": false,
1536 "nullPointMode": "null as zero",
1537 "percentage": false,
1538 "pluginVersion": "8.4.5",
1542 "seriesOverrides": [],
1545 "steppedLine": false,
1549 "type": "prometheus",
1550 "uid": "${DS_PROMETHEUS}"
1552 "expr": "rate(arvados_dispatchcloud_instances_run_probe_duration_seconds_sum{}[10m]) / rate(arvados_dispatchcloud_instances_run_probe_duration_seconds_count{}[10m])",
1554 "legendFormat": "{{outcome}}",
1560 "title": "run probe duration avg[10m]",
1564 "value_type": "individual"
1574 "$$hashKey": "object:125",
1581 "$$hashKey": "object:126",
1597 "type": "prometheus",
1598 "uid": "${DS_PROMETHEUS}"
1608 "hiddenSeries": false,
1621 "nullPointMode": "null",
1622 "percentage": false,
1623 "pluginVersion": "8.4.5",
1627 "seriesOverrides": [],
1630 "steppedLine": false,
1634 "type": "prometheus",
1635 "uid": "${DS_PROMETHEUS}"
1637 "expr": "delta(arvados_dispatchcloud_instances_run_probe_duration_seconds_count{}[1m])",
1640 "legendFormat": "{{outcome}}",
1646 "title": "run probe count by outcome -- delta[1m]",
1650 "value_type": "individual"
1660 "$$hashKey": "object:149",
1667 "$$hashKey": "object:150",
1680 "schemaVersion": 38,
1691 "refresh_intervals": [
1704 "title": "Arvados cluster overview",
1705 "uid": "ArvadosClusterOverviewDashboard",