Refactor the multi-host salt install page.
[arvados.git] / services / keepstore / metrics.go
index 80194eb82447828bc46ddb311c6d120f580eab37..d04601fbec84128ff47cf65ea15588aa6212b9c5 100644 (file)
@@ -2,12 +2,11 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package keepstore
 
 import (
        "fmt"
 
-       "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "github.com/prometheus/client_golang/prometheus"
 )
 
@@ -20,7 +19,7 @@ func (m *nodeMetrics) setupBufferPoolMetrics(b *bufferPool) {
                prometheus.GaugeOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "bufferpool_bytes_allocated",
+                       Name:      "bufferpool_allocated_bytes",
                        Help:      "Number of bytes allocated to buffers",
                },
                func() float64 { return float64(b.Alloc()) },
@@ -29,7 +28,7 @@ func (m *nodeMetrics) setupBufferPoolMetrics(b *bufferPool) {
                prometheus.GaugeOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "bufferpool_buffers_max",
+                       Name:      "bufferpool_max_buffers",
                        Help:      "Maximum number of buffers allowed",
                },
                func() float64 { return float64(b.Cap()) },
@@ -38,7 +37,7 @@ func (m *nodeMetrics) setupBufferPoolMetrics(b *bufferPool) {
                prometheus.GaugeOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "bufferpool_buffers_in_use",
+                       Name:      "bufferpool_inuse_buffers",
                        Help:      "Number of buffers in use",
                },
                func() float64 { return float64(b.Len()) },
@@ -50,7 +49,7 @@ func (m *nodeMetrics) setupWorkQueueMetrics(q *WorkQueue, qName string) {
                prometheus.GaugeOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      fmt.Sprintf("%s_queue_in_progress", qName),
+                       Name:      fmt.Sprintf("%s_queue_inprogress_entries", qName),
                        Help:      fmt.Sprintf("Number of %s requests in progress", qName),
                },
                func() float64 { return float64(getWorkQueueStatus(q).InProgress) },
@@ -59,225 +58,58 @@ func (m *nodeMetrics) setupWorkQueueMetrics(q *WorkQueue, qName string) {
                prometheus.GaugeOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      fmt.Sprintf("%s_queue_queued", qName),
+                       Name:      fmt.Sprintf("%s_queue_pending_entries", qName),
                        Help:      fmt.Sprintf("Number of queued %s requests", qName),
                },
                func() float64 { return float64(getWorkQueueStatus(q).Queued) },
        ))
 }
 
-func (m *nodeMetrics) setupRequestMetrics(rc httpserver.RequestCounter) {
-       m.reg.MustRegister(prometheus.NewGaugeFunc(
-               prometheus.GaugeOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "requests_current",
-                       Help:      "Number of requests in progress",
-               },
-               func() float64 { return float64(rc.Current()) },
-       ))
-       m.reg.MustRegister(prometheus.NewGaugeFunc(
-               prometheus.GaugeOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "requests_max",
-                       Help:      "Maximum number of concurrent requests",
-               },
-               func() float64 { return float64(rc.Max()) },
-       ))
-}
-
 type volumeMetricsVecs struct {
-       reg        *prometheus.Registry
-       BytesFree  *prometheus.GaugeVec
-       BytesUsed  *prometheus.GaugeVec
-       Errors     *prometheus.CounterVec
-       Ops        *prometheus.CounterVec
-       CompareOps *prometheus.CounterVec
-       GetOps     *prometheus.CounterVec
-       PutOps     *prometheus.CounterVec
-       TouchOps   *prometheus.CounterVec
-       InBytes    *prometheus.CounterVec
-       OutBytes   *prometheus.CounterVec
-       ErrorCodes *prometheus.CounterVec
-}
-
-type volumeMetrics struct {
-       reg              *prometheus.Registry
-       lbls             []string
-       internalCounters map[string]*prometheus.CounterVec
-       BytesFree        prometheus.Gauge
-       BytesUsed        prometheus.Gauge
-       Errors           prometheus.Counter
-       Ops              prometheus.Counter
-       CompareOps       prometheus.Counter
-       GetOps           prometheus.Counter
-       PutOps           prometheus.Counter
-       TouchOps         prometheus.Counter
-       InBytes          prometheus.Counter
-       OutBytes         prometheus.Counter
-       ErrorCodes       *prometheus.CounterVec
+       ioBytes     *prometheus.CounterVec
+       errCounters *prometheus.CounterVec
+       opsCounters *prometheus.CounterVec
 }
 
 func newVolumeMetricsVecs(reg *prometheus.Registry) *volumeMetricsVecs {
-       m := &volumeMetricsVecs{
-               reg: reg,
-       }
-       m.BytesFree = prometheus.NewGaugeVec(
-               prometheus.GaugeOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "volume_bytes_free",
-                       Help:      "Number of free bytes on the volume",
-               },
-               []string{"label", "mount_point", "device_number"},
-       )
-       reg.MustRegister(m.BytesFree)
-       m.BytesUsed = prometheus.NewGaugeVec(
-               prometheus.GaugeOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "volume_bytes_used",
-                       Help:      "Number of used bytes on the volume",
-               },
-               []string{"label", "mount_point", "device_number"},
-       )
-       reg.MustRegister(m.BytesUsed)
-       m.Errors = prometheus.NewCounterVec(
-               prometheus.CounterOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "volume_io_errors",
-                       Help:      "Number of volume I/O errors",
-               },
-               []string{"label", "mount_point", "device_number"},
-       )
-       reg.MustRegister(m.Errors)
-       m.Ops = prometheus.NewCounterVec(
-               prometheus.CounterOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "volume_io_ops",
-                       Help:      "Number of volume I/O operations",
-               },
-               []string{"label", "mount_point", "device_number"},
-       )
-       reg.MustRegister(m.Ops)
-       m.CompareOps = prometheus.NewCounterVec(
+       m := &volumeMetricsVecs{}
+       m.opsCounters = prometheus.NewCounterVec(
                prometheus.CounterOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "volume_io_compare_ops",
-                       Help:      "Number of volume I/O compare operations",
+                       Name:      "volume_operations",
+                       Help:      "Number of volume operations",
                },
-               []string{"label", "mount_point", "device_number"},
+               []string{"device_id", "operation"},
        )
-       reg.MustRegister(m.CompareOps)
-       m.GetOps = prometheus.NewCounterVec(
+       reg.MustRegister(m.opsCounters)
+       m.errCounters = prometheus.NewCounterVec(
                prometheus.CounterOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "volume_io_get_ops",
-                       Help:      "Number of volume I/O get operations",
+                       Name:      "volume_errors",
+                       Help:      "Number of volume errors",
                },
-               []string{"label", "mount_point", "device_number"},
+               []string{"device_id", "error_type"},
        )
-       reg.MustRegister(m.GetOps)
-       m.PutOps = prometheus.NewCounterVec(
+       reg.MustRegister(m.errCounters)
+       m.ioBytes = prometheus.NewCounterVec(
                prometheus.CounterOpts{
                        Namespace: "arvados",
                        Subsystem: "keepstore",
-                       Name:      "volume_io_put_ops",
-                       Help:      "Number of volume I/O put operations",
+                       Name:      "volume_io_bytes",
+                       Help:      "Volume I/O traffic in bytes",
                },
-               []string{"label", "mount_point", "device_number"},
+               []string{"device_id", "direction"},
        )
-       reg.MustRegister(m.PutOps)
-       m.TouchOps = prometheus.NewCounterVec(
-               prometheus.CounterOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "volume_io_touch_ops",
-                       Help:      "Number of volume I/O touch operations",
-               },
-               []string{"label", "mount_point", "device_number"},
-       )
-       reg.MustRegister(m.TouchOps)
-       m.InBytes = prometheus.NewCounterVec(
-               prometheus.CounterOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "volume_io_in_bytes",
-                       Help:      "Number of input bytes",
-               },
-               []string{"label", "mount_point", "device_number"},
-       )
-       reg.MustRegister(m.InBytes)
-       m.OutBytes = prometheus.NewCounterVec(
-               prometheus.CounterOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "volume_io_out_bytes",
-                       Help:      "Number of output bytes",
-               },
-               []string{"label", "mount_point", "device_number"},
-       )
-       reg.MustRegister(m.OutBytes)
-       m.ErrorCodes = prometheus.NewCounterVec(
-               prometheus.CounterOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "volume_io_error_codes",
-                       Help:      "Number of I/O errors by error code",
-               },
-               []string{"label", "mount_point", "device_number", "error_code"},
-       )
-       reg.MustRegister(m.ErrorCodes)
+       reg.MustRegister(m.ioBytes)
 
        return m
 }
 
-func (m *volumeMetricsVecs) curryWith(lbl string, mnt string, dev string) *volumeMetrics {
-       lbls := []string{lbl, mnt, dev}
-       curried := &volumeMetrics{
-               reg:              m.reg,
-               lbls:             lbls,
-               internalCounters: make(map[string]*prometheus.CounterVec),
-               BytesFree:        m.BytesFree.WithLabelValues(lbls...),
-               BytesUsed:        m.BytesUsed.WithLabelValues(lbls...),
-               Errors:           m.Errors.WithLabelValues(lbls...),
-               Ops:              m.Ops.WithLabelValues(lbls...),
-               CompareOps:       m.CompareOps.WithLabelValues(lbls...),
-               GetOps:           m.GetOps.WithLabelValues(lbls...),
-               PutOps:           m.PutOps.WithLabelValues(lbls...),
-               TouchOps:         m.TouchOps.WithLabelValues(lbls...),
-               InBytes:          m.InBytes.WithLabelValues(lbls...),
-               OutBytes:         m.OutBytes.WithLabelValues(lbls...),
-               ErrorCodes: m.ErrorCodes.MustCurryWith(prometheus.Labels{
-                       "label":         lbl,
-                       "mount_point":   mnt,
-                       "device_number": dev,
-               }),
-       }
-       return curried
-}
-
-// Returns a driver specific counter, creating it when needed. The 'name' argument
-// should include the driver prefix.
-func (m *volumeMetrics) getInternalCounter(name string, help string) prometheus.Counter {
-       counterVec, ok := m.internalCounters[name]
-       if !ok {
-               counterVec = prometheus.NewCounterVec(
-                       prometheus.CounterOpts{
-                               Namespace: "arvados",
-                               Subsystem: "keepstore",
-                               Name:      name,
-                               Help:      help,
-                       },
-                       []string{"label", "mount_point", "device_number"},
-               )
-               m.reg.MustRegister(counterVec)
-               m.internalCounters[name] = counterVec
-       }
-       return counterVec.WithLabelValues(m.lbls...)
+func (vm *volumeMetricsVecs) getCounterVecsFor(lbls prometheus.Labels) (opsCV, errCV, ioCV *prometheus.CounterVec) {
+       opsCV = vm.opsCounters.MustCurryWith(lbls)
+       errCV = vm.errCounters.MustCurryWith(lbls)
+       ioCV = vm.ioBytes.MustCurryWith(lbls)
+       return
 }