X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/f0553505e32ee00999d1d680da14260a9a0f6b99..44ee7219e5363f85208d0813373e096d9e14c059:/services/keepstore/metrics.go diff --git a/services/keepstore/metrics.go b/services/keepstore/metrics.go index f0815ae4ab..b2f0aa6638 100644 --- a/services/keepstore/metrics.go +++ b/services/keepstore/metrics.go @@ -7,208 +7,109 @@ package main import ( "fmt" - "git.curoverse.com/arvados.git/sdk/go/httpserver" "github.com/prometheus/client_golang/prometheus" ) type nodeMetrics struct { reg *prometheus.Registry - rc httpserver.RequestCounter } -func (m *nodeMetrics) setup() { +func (m *nodeMetrics) setupBufferPoolMetrics(b *bufferPool) { m.reg.MustRegister(prometheus.NewGaugeFunc( prometheus.GaugeOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "bufferpool_bytes_allocated", + Name: "bufferpool_allocated_bytes", Help: "Number of bytes allocated to buffers", }, - func() float64 { return float64(bufs.Alloc()) }, + func() float64 { return float64(b.Alloc()) }, )) m.reg.MustRegister(prometheus.NewGaugeFunc( prometheus.GaugeOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "bufferpool_buffers_max", + Name: "bufferpool_max_buffers", Help: "Maximum number of buffers allowed", }, - func() float64 { return float64(bufs.Cap()) }, + func() float64 { return float64(b.Cap()) }, )) m.reg.MustRegister(prometheus.NewGaugeFunc( prometheus.GaugeOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "bufferpool_buffers_in_use", + Name: "bufferpool_inuse_buffers", Help: "Number of buffers in use", }, - func() float64 { return float64(bufs.Len()) }, - )) - m.reg.MustRegister(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "pull_queue_in_progress", - Help: "Number of pull requests in progress", - }, - func() float64 { return float64(getWorkQueueStatus(pullq).InProgress) }, + func() float64 { return float64(b.Len()) }, )) +} + +func (m *nodeMetrics) setupWorkQueueMetrics(q *WorkQueue, qName string) { m.reg.MustRegister(prometheus.NewGaugeFunc( prometheus.GaugeOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "pull_queue_queued", - Help: "Number of queued pull requests", + Name: fmt.Sprintf("%s_queue_inprogress_entries", qName), + Help: fmt.Sprintf("Number of %s requests in progress", qName), }, - func() float64 { return float64(getWorkQueueStatus(pullq).Queued) }, + func() float64 { return float64(getWorkQueueStatus(q).InProgress) }, )) m.reg.MustRegister(prometheus.NewGaugeFunc( prometheus.GaugeOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "trash_queue_in_progress", - Help: "Number of trash requests in progress", + Name: fmt.Sprintf("%s_queue_pending_entries", qName), + Help: fmt.Sprintf("Number of queued %s requests", qName), }, - func() float64 { return float64(getWorkQueueStatus(trashq).InProgress) }, + func() float64 { return float64(getWorkQueueStatus(q).Queued) }, )) - m.reg.MustRegister(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ +} + +type volumeMetricsVecs struct { + ioBytes *prometheus.CounterVec + errCounters *prometheus.CounterVec + opsCounters *prometheus.CounterVec +} + +func newVolumeMetricsVecs(reg *prometheus.Registry) *volumeMetricsVecs { + m := &volumeMetricsVecs{} + m.opsCounters = prometheus.NewCounterVec( + prometheus.CounterOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "trash_queue_queued", - Help: "Number of queued trash requests", + Name: "volume_operations", + Help: "Number of volume operations", }, - func() float64 { return float64(getWorkQueueStatus(trashq).Queued) }, - )) - m.reg.MustRegister(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ + []string{"device_id", "operation"}, + ) + reg.MustRegister(m.opsCounters) + m.errCounters = prometheus.NewCounterVec( + prometheus.CounterOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "requests_current", - Help: "Number of requests in progress", + Name: "volume_errors", + Help: "Number of volume errors", }, - func() float64 { return float64(m.rc.Current()) }, - )) - m.reg.MustRegister(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ + []string{"device_id", "error_type"}, + ) + reg.MustRegister(m.errCounters) + m.ioBytes = prometheus.NewCounterVec( + prometheus.CounterOpts{ Namespace: "arvados", Subsystem: "keepstore", - Name: "requests_max", - Help: "Maximum number of concurrent requests", + Name: "volume_io_bytes", + Help: "Volume I/O traffic in bytes", }, - func() float64 { return float64(m.rc.Max()) }, - )) - // Register individual volume's metrics - vols := KeepVM.AllReadable() - for _, vol := range vols { - labels := prometheus.Labels{ - "label": vol.String(), - "mount_point": vol.Status().MountPoint, - "device_number": fmt.Sprintf("%d", vol.Status().DeviceNum), - } - if vol, ok := vol.(InternalMetricser); ok { - // Per-driver internal metrics - vol.SetupInternalMetrics(m.reg, labels) - } - m.reg.Register(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_bytes_free", - Help: "Number of free bytes on the volume", - ConstLabels: labels, - }, - func() float64 { return float64(vol.Status().BytesFree) }, - )) - m.reg.Register(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_bytes_used", - Help: "Number of used bytes on the volume", - ConstLabels: labels, - }, - func() float64 { return float64(vol.Status().BytesUsed) }, - )) - m.reg.Register(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_errors", - Help: "Number of I/O errors", - ConstLabels: labels, - }, - func() float64 { return float64(KeepVM.VolumeStats(vol).Errors) }, - )) - m.reg.Register(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_ops", - Help: "Number of I/O operations", - ConstLabels: labels, - }, - func() float64 { return float64(KeepVM.VolumeStats(vol).Ops) }, - )) - m.reg.Register(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_compare_ops", - Help: "Number of I/O compare operations", - ConstLabels: labels, - }, - func() float64 { return float64(KeepVM.VolumeStats(vol).CompareOps) }, - )) - m.reg.Register(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_get_ops", - Help: "Number of I/O get operations", - ConstLabels: labels, - }, - func() float64 { return float64(KeepVM.VolumeStats(vol).GetOps) }, - )) - m.reg.Register(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_put_ops", - Help: "Number of I/O put operations", - ConstLabels: labels, - }, - func() float64 { return float64(KeepVM.VolumeStats(vol).PutOps) }, - )) - m.reg.Register(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_touch_ops", - Help: "Number of I/O touch operations", - ConstLabels: labels, - }, - func() float64 { return float64(KeepVM.VolumeStats(vol).TouchOps) }, - )) - m.reg.Register(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_input_bytes", - Help: "Number of input bytes", - ConstLabels: labels, - }, - func() float64 { return float64(KeepVM.VolumeStats(vol).InBytes) }, - )) - m.reg.Register(prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "arvados", - Subsystem: "keepstore", - Name: "volume_io_output_bytes", - Help: "Number of output bytes", - ConstLabels: labels, - }, - func() float64 { return float64(KeepVM.VolumeStats(vol).OutBytes) }, - )) - } + []string{"device_id", "direction"}, + ) + reg.MustRegister(m.ioBytes) + + return m +} + +func (vm *volumeMetricsVecs) getCounterVecsFor(lbls prometheus.Labels) (opsCV, errCV, ioCV *prometheus.CounterVec) { + opsCV = vm.opsCounters.MustCurryWith(lbls) + errCV = vm.errCounters.MustCurryWith(lbls) + ioCV = vm.ioBytes.MustCurryWith(lbls) + return }