X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/3a3d67ccee068a85aa3b79c5abd40170223071e3..fa9a17bf1ea10d9130188730a1aa160e89daaa13:/services/keepstore/metrics.go diff --git a/services/keepstore/metrics.go b/services/keepstore/metrics.go index 63e406c930..b2f0aa6638 100644 --- a/services/keepstore/metrics.go +++ b/services/keepstore/metrics.go @@ -7,7 +7,6 @@ package main import ( "fmt" - "git.curoverse.com/arvados.git/sdk/go/httpserver" "github.com/prometheus/client_golang/prometheus" ) @@ -20,7 +19,7 @@ func (m *nodeMetrics) setupBufferPoolMetrics(b *bufferPool) { prometheus.GaugeOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "bufferpool_bytes_allocated", + Name: "bufferpool_allocated_bytes", Help: "Number of bytes allocated to buffers", }, func() float64 { return float64(b.Alloc()) }, @@ -29,7 +28,7 @@ func (m *nodeMetrics) setupBufferPoolMetrics(b *bufferPool) { prometheus.GaugeOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "bufferpool_buffers_max", + Name: "bufferpool_max_buffers", Help: "Maximum number of buffers allowed", }, func() float64 { return float64(b.Cap()) }, @@ -38,7 +37,7 @@ func (m *nodeMetrics) setupBufferPoolMetrics(b *bufferPool) { prometheus.GaugeOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "bufferpool_buffers_in_use", + Name: "bufferpool_inuse_buffers", Help: "Number of buffers in use", }, func() float64 { return float64(b.Len()) }, @@ -50,7 +49,7 @@ func (m *nodeMetrics) setupWorkQueueMetrics(q *WorkQueue, qName string) { prometheus.GaugeOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: fmt.Sprintf("%s_queue_in_progress", qName), + Name: fmt.Sprintf("%s_queue_inprogress_entries", qName), Help: fmt.Sprintf("Number of %s requests in progress", qName), }, func() float64 { return float64(getWorkQueueStatus(q).InProgress) }, @@ -59,196 +58,58 @@ func (m *nodeMetrics) setupWorkQueueMetrics(q *WorkQueue, qName string) { prometheus.GaugeOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: fmt.Sprintf("%s_queue_queued", qName), + Name: fmt.Sprintf("%s_queue_pending_entries", qName), Help: fmt.Sprintf("Number of queued %s requests", qName), }, func() float64 { return float64(getWorkQueueStatus(q).Queued) }, )) } -func (m *nodeMetrics) setupRequestMetrics(rc httpserver.RequestCounter) { - m.reg.MustRegister(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "requests_current", - Help: "Number of requests in progress", - }, - func() float64 { return float64(rc.Current()) }, - )) - m.reg.MustRegister(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "requests_max", - Help: "Maximum number of concurrent requests", - }, - func() float64 { return float64(rc.Max()) }, - )) -} - type volumeMetricsVecs struct { - BytesFree *prometheus.GaugeVec - BytesUsed *prometheus.GaugeVec - Errors *prometheus.CounterVec - Ops *prometheus.CounterVec - CompareOps *prometheus.CounterVec - GetOps *prometheus.CounterVec - PutOps *prometheus.CounterVec - TouchOps *prometheus.CounterVec - InBytes *prometheus.CounterVec - OutBytes *prometheus.CounterVec - ErrorCodes *prometheus.CounterVec -} - -type volumeMetrics struct { - BytesFree prometheus.Gauge - BytesUsed prometheus.Gauge - Errors prometheus.Counter - Ops prometheus.Counter - CompareOps prometheus.Counter - GetOps prometheus.Counter - PutOps prometheus.Counter - TouchOps prometheus.Counter - InBytes prometheus.Counter - OutBytes prometheus.Counter - ErrorCodes *prometheus.CounterVec + ioBytes *prometheus.CounterVec + errCounters *prometheus.CounterVec + opsCounters *prometheus.CounterVec } func newVolumeMetricsVecs(reg *prometheus.Registry) *volumeMetricsVecs { m := &volumeMetricsVecs{} - m.BytesFree = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_bytes_free", - Help: "Number of free bytes on the volume", - }, - []string{"label", "mount_point", "device_number"}, - ) - reg.MustRegister(m.BytesFree) - m.BytesUsed = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_bytes_used", - Help: "Number of used bytes on the volume", - }, - []string{"label", "mount_point", "device_number"}, - ) - reg.MustRegister(m.BytesUsed) - m.Errors = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_errors", - Help: "Number of volume I/O errors", - }, - []string{"label", "mount_point", "device_number"}, - ) - reg.MustRegister(m.Errors) - m.Ops = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_ops", - Help: "Number of volume I/O operations", - }, - []string{"label", "mount_point", "device_number"}, - ) - reg.MustRegister(m.Ops) - m.CompareOps = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_compare_ops", - Help: "Number of volume I/O compare operations", - }, - []string{"label", "mount_point", "device_number"}, - ) - reg.MustRegister(m.CompareOps) - m.GetOps = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_get_ops", - Help: "Number of volume I/O get operations", - }, - []string{"label", "mount_point", "device_number"}, - ) - reg.MustRegister(m.GetOps) - m.PutOps = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_put_ops", - Help: "Number of volume I/O put operations", - }, - []string{"label", "mount_point", "device_number"}, - ) - reg.MustRegister(m.PutOps) - m.TouchOps = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_touch_ops", - Help: "Number of volume I/O touch operations", - }, - []string{"label", "mount_point", "device_number"}, - ) - reg.MustRegister(m.TouchOps) - m.InBytes = prometheus.NewCounterVec( + m.opsCounters = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "volume_io_in_bytes", - Help: "Number of input bytes", + Name: "volume_operations", + Help: "Number of volume operations", }, - []string{"label", "mount_point", "device_number"}, + []string{"device_id", "operation"}, ) - reg.MustRegister(m.InBytes) - m.OutBytes = prometheus.NewCounterVec( + reg.MustRegister(m.opsCounters) + m.errCounters = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "volume_io_out_bytes", - Help: "Number of output bytes", + Name: "volume_errors", + Help: "Number of volume errors", }, - []string{"label", "mount_point", "device_number"}, + []string{"device_id", "error_type"}, ) - reg.MustRegister(m.OutBytes) - m.ErrorCodes = prometheus.NewCounterVec( + reg.MustRegister(m.errCounters) + m.ioBytes = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "volume_io_error_codes", - Help: "Number of I/O errors by error code", + Name: "volume_io_bytes", + Help: "Volume I/O traffic in bytes", }, - []string{"label", "mount_point", "device_number", "error_code"}, + []string{"device_id", "direction"}, ) - reg.MustRegister(m.ErrorCodes) + reg.MustRegister(m.ioBytes) return m } -func (m *volumeMetricsVecs) curryWith(lbl string, mnt string, dev string) *volumeMetrics { - lbls := []string{lbl, mnt, dev} - curried := &volumeMetrics{ - BytesFree: m.BytesFree.WithLabelValues(lbls...), - BytesUsed: m.BytesUsed.WithLabelValues(lbls...), - Errors: m.Errors.WithLabelValues(lbls...), - Ops: m.Ops.WithLabelValues(lbls...), - CompareOps: m.CompareOps.WithLabelValues(lbls...), - GetOps: m.GetOps.WithLabelValues(lbls...), - PutOps: m.PutOps.WithLabelValues(lbls...), - TouchOps: m.TouchOps.WithLabelValues(lbls...), - InBytes: m.InBytes.WithLabelValues(lbls...), - OutBytes: m.OutBytes.WithLabelValues(lbls...), - ErrorCodes: m.ErrorCodes.MustCurryWith(prometheus.Labels{ - "label": lbl, - "mount_point": mnt, - "device_number": dev, - }), - } - return curried +func (vm *volumeMetricsVecs) getCounterVecsFor(lbls prometheus.Labels) (opsCV, errCV, ioCV *prometheus.CounterVec) { + opsCV = vm.opsCounters.MustCurryWith(lbls) + errCV = vm.errCounters.MustCurryWith(lbls) + ioCV = vm.ioBytes.MustCurryWith(lbls) + return }