Refactor the multi-host salt install page.
[arvados.git] / services / keepstore / metrics.go
index f0815ae4ab3f6171f6aaa99e5bc6d70170830967..d04601fbec84128ff47cf65ea15588aa6212b9c5 100644 (file)
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package keepstore
 
 import (
        "fmt"
 
-       "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "github.com/prometheus/client_golang/prometheus"
 )
 
 type nodeMetrics struct {
        reg *prometheus.Registry
-       rc  httpserver.RequestCounter
 }
 
-func (m *nodeMetrics) setup() {
+func (m *nodeMetrics) setupBufferPoolMetrics(b *bufferPool) {
        m.reg.MustRegister(prometheus.NewGaugeFunc(
                prometheus.GaugeOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "bufferpool_bytes_allocated",
+                       Name:      "bufferpool_allocated_bytes",
                        Help:      "Number of bytes allocated to buffers",
                },
-               func() float64 { return float64(bufs.Alloc()) },
+               func() float64 { return float64(b.Alloc()) },
        ))
        m.reg.MustRegister(prometheus.NewGaugeFunc(
                prometheus.GaugeOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "bufferpool_buffers_max",
+                       Name:      "bufferpool_max_buffers",
                        Help:      "Maximum number of buffers allowed",
                },
-               func() float64 { return float64(bufs.Cap()) },
+               func() float64 { return float64(b.Cap()) },
        ))
        m.reg.MustRegister(prometheus.NewGaugeFunc(
                prometheus.GaugeOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "bufferpool_buffers_in_use",
+                       Name:      "bufferpool_inuse_buffers",
                        Help:      "Number of buffers in use",
                },
-               func() float64 { return float64(bufs.Len()) },
-       ))
-       m.reg.MustRegister(prometheus.NewGaugeFunc(
-               prometheus.GaugeOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "pull_queue_in_progress",
-                       Help:      "Number of pull requests in progress",
-               },
-               func() float64 { return float64(getWorkQueueStatus(pullq).InProgress) },
+               func() float64 { return float64(b.Len()) },
        ))
+}
+
+func (m *nodeMetrics) setupWorkQueueMetrics(q *WorkQueue, qName string) {
        m.reg.MustRegister(prometheus.NewGaugeFunc(
                prometheus.GaugeOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "pull_queue_queued",
-                       Help:      "Number of queued pull requests",
+                       Name:      fmt.Sprintf("%s_queue_inprogress_entries", qName),
+                       Help:      fmt.Sprintf("Number of %s requests in progress", qName),
                },
-               func() float64 { return float64(getWorkQueueStatus(pullq).Queued) },
+               func() float64 { return float64(getWorkQueueStatus(q).InProgress) },
        ))
        m.reg.MustRegister(prometheus.NewGaugeFunc(
                prometheus.GaugeOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "trash_queue_in_progress",
-                       Help:      "Number of trash requests in progress",
+                       Name:      fmt.Sprintf("%s_queue_pending_entries", qName),
+                       Help:      fmt.Sprintf("Number of queued %s requests", qName),
                },
-               func() float64 { return float64(getWorkQueueStatus(trashq).InProgress) },
+               func() float64 { return float64(getWorkQueueStatus(q).Queued) },
        ))
-       m.reg.MustRegister(prometheus.NewGaugeFunc(
-               prometheus.GaugeOpts{
+}
+
+type volumeMetricsVecs struct {
+       ioBytes     *prometheus.CounterVec
+       errCounters *prometheus.CounterVec
+       opsCounters *prometheus.CounterVec
+}
+
+func newVolumeMetricsVecs(reg *prometheus.Registry) *volumeMetricsVecs {
+       m := &volumeMetricsVecs{}
+       m.opsCounters = prometheus.NewCounterVec(
+               prometheus.CounterOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "trash_queue_queued",
-                       Help:      "Number of queued trash requests",
+                       Name:      "volume_operations",
+                       Help:      "Number of volume operations",
                },
-               func() float64 { return float64(getWorkQueueStatus(trashq).Queued) },
-       ))
-       m.reg.MustRegister(prometheus.NewGaugeFunc(
-               prometheus.GaugeOpts{
+               []string{"device_id", "operation"},
+       )
+       reg.MustRegister(m.opsCounters)
+       m.errCounters = prometheus.NewCounterVec(
+               prometheus.CounterOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "requests_current",
-                       Help:      "Number of requests in progress",
+                       Name:      "volume_errors",
+                       Help:      "Number of volume errors",
                },
-               func() float64 { return float64(m.rc.Current()) },
-       ))
-       m.reg.MustRegister(prometheus.NewGaugeFunc(
-               prometheus.GaugeOpts{
+               []string{"device_id", "error_type"},
+       )
+       reg.MustRegister(m.errCounters)
+       m.ioBytes = prometheus.NewCounterVec(
+               prometheus.CounterOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "requests_max",
-                       Help:      "Maximum number of concurrent requests",
+                       Name:      "volume_io_bytes",
+                       Help:      "Volume I/O traffic in bytes",
                },
-               func() float64 { return float64(m.rc.Max()) },
-       ))
-       // Register individual volume's metrics
-       vols := KeepVM.AllReadable()
-       for _, vol := range vols {
-               labels := prometheus.Labels{
-                       "label":         vol.String(),
-                       "mount_point":   vol.Status().MountPoint,
-                       "device_number": fmt.Sprintf("%d", vol.Status().DeviceNum),
-               }
-               if vol, ok := vol.(InternalMetricser); ok {
-                       // Per-driver internal metrics
-                       vol.SetupInternalMetrics(m.reg, labels)
-               }
-               m.reg.Register(prometheus.NewGaugeFunc(
-                       prometheus.GaugeOpts{
-                               Namespace:   "arvados",
-                               Subsystem:   "keepstore",
-                               Name:        "volume_bytes_free",
-                               Help:        "Number of free bytes on the volume",
-                               ConstLabels: labels,
-                       },
-                       func() float64 { return float64(vol.Status().BytesFree) },
-               ))
-               m.reg.Register(prometheus.NewGaugeFunc(
-                       prometheus.GaugeOpts{
-                               Namespace:   "arvados",
-                               Subsystem:   "keepstore",
-                               Name:        "volume_bytes_used",
-                               Help:        "Number of used bytes on the volume",
-                               ConstLabels: labels,
-                       },
-                       func() float64 { return float64(vol.Status().BytesUsed) },
-               ))
-               m.reg.Register(prometheus.NewGaugeFunc(
-                       prometheus.GaugeOpts{
-                               Namespace:   "arvados",
-                               Subsystem:   "keepstore",
-                               Name:        "volume_io_errors",
-                               Help:        "Number of I/O errors",
-                               ConstLabels: labels,
-                       },
-                       func() float64 { return float64(KeepVM.VolumeStats(vol).Errors) },
-               ))
-               m.reg.Register(prometheus.NewGaugeFunc(
-                       prometheus.GaugeOpts{
-                               Namespace:   "arvados",
-                               Subsystem:   "keepstore",
-                               Name:        "volume_io_ops",
-                               Help:        "Number of I/O operations",
-                               ConstLabels: labels,
-                       },
-                       func() float64 { return float64(KeepVM.VolumeStats(vol).Ops) },
-               ))
-               m.reg.Register(prometheus.NewGaugeFunc(
-                       prometheus.GaugeOpts{
-                               Namespace:   "arvados",
-                               Subsystem:   "keepstore",
-                               Name:        "volume_io_compare_ops",
-                               Help:        "Number of I/O compare operations",
-                               ConstLabels: labels,
-                       },
-                       func() float64 { return float64(KeepVM.VolumeStats(vol).CompareOps) },
-               ))
-               m.reg.Register(prometheus.NewGaugeFunc(
-                       prometheus.GaugeOpts{
-                               Namespace:   "arvados",
-                               Subsystem:   "keepstore",
-                               Name:        "volume_io_get_ops",
-                               Help:        "Number of I/O get operations",
-                               ConstLabels: labels,
-                       },
-                       func() float64 { return float64(KeepVM.VolumeStats(vol).GetOps) },
-               ))
-               m.reg.Register(prometheus.NewGaugeFunc(
-                       prometheus.GaugeOpts{
-                               Namespace:   "arvados",
-                               Subsystem:   "keepstore",
-                               Name:        "volume_io_put_ops",
-                               Help:        "Number of I/O put operations",
-                               ConstLabels: labels,
-                       },
-                       func() float64 { return float64(KeepVM.VolumeStats(vol).PutOps) },
-               ))
-               m.reg.Register(prometheus.NewGaugeFunc(
-                       prometheus.GaugeOpts{
-                               Namespace:   "arvados",
-                               Subsystem:   "keepstore",
-                               Name:        "volume_io_touch_ops",
-                               Help:        "Number of I/O touch operations",
-                               ConstLabels: labels,
-                       },
-                       func() float64 { return float64(KeepVM.VolumeStats(vol).TouchOps) },
-               ))
-               m.reg.Register(prometheus.NewGaugeFunc(
-                       prometheus.GaugeOpts{
-                               Namespace:   "arvados",
-                               Subsystem:   "keepstore",
-                               Name:        "volume_io_input_bytes",
-                               Help:        "Number of input bytes",
-                               ConstLabels: labels,
-                       },
-                       func() float64 { return float64(KeepVM.VolumeStats(vol).InBytes) },
-               ))
-               m.reg.Register(prometheus.NewGaugeFunc(
-                       prometheus.GaugeOpts{
-                               Namespace:   "arvados",
-                               Subsystem:   "keepstore",
-                               Name:        "volume_io_output_bytes",
-                               Help:        "Number of output bytes",
-                               ConstLabels: labels,
-                       },
-                       func() float64 { return float64(KeepVM.VolumeStats(vol).OutBytes) },
-               ))
-       }
+               []string{"device_id", "direction"},
+       )
+       reg.MustRegister(m.ioBytes)
+
+       return m
+}
+
+func (vm *volumeMetricsVecs) getCounterVecsFor(lbls prometheus.Labels) (opsCV, errCV, ioCV *prometheus.CounterVec) {
+       opsCV = vm.opsCounters.MustCurryWith(lbls)
+       errCV = vm.errCounters.MustCurryWith(lbls)
+       ioCV = vm.ioBytes.MustCurryWith(lbls)
+       return
 }