13937: Refactors approach to pass volume metrics as curried vecs (WIP)
[arvados.git] / services / keepstore / metrics.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "fmt"
9
10         "git.curoverse.com/arvados.git/sdk/go/httpserver"
11         "github.com/prometheus/client_golang/prometheus"
12 )
13
14 type nodeMetrics struct {
15         reg *prometheus.Registry
16 }
17
18 func (m *nodeMetrics) setupBufferPoolMetrics(b *bufferPool) {
19         m.reg.MustRegister(prometheus.NewGaugeFunc(
20                 prometheus.GaugeOpts{
21                         Namespace: "arvados",
22                         Subsystem: "keepstore",
23                         Name:      "bufferpool_bytes_allocated",
24                         Help:      "Number of bytes allocated to buffers",
25                 },
26                 func() float64 { return float64(b.Alloc()) },
27         ))
28         m.reg.MustRegister(prometheus.NewGaugeFunc(
29                 prometheus.GaugeOpts{
30                         Namespace: "arvados",
31                         Subsystem: "keepstore",
32                         Name:      "bufferpool_buffers_max",
33                         Help:      "Maximum number of buffers allowed",
34                 },
35                 func() float64 { return float64(b.Cap()) },
36         ))
37         m.reg.MustRegister(prometheus.NewGaugeFunc(
38                 prometheus.GaugeOpts{
39                         Namespace: "arvados",
40                         Subsystem: "keepstore",
41                         Name:      "bufferpool_buffers_in_use",
42                         Help:      "Number of buffers in use",
43                 },
44                 func() float64 { return float64(b.Len()) },
45         ))
46 }
47
48 func (m *nodeMetrics) setupWorkQueueMetrics(q *WorkQueue, qName string) {
49         m.reg.MustRegister(prometheus.NewGaugeFunc(
50                 prometheus.GaugeOpts{
51                         Namespace: "arvados",
52                         Subsystem: "keepstore",
53                         Name:      fmt.Sprintf("%s_queue_in_progress", qName),
54                         Help:      fmt.Sprintf("Number of %s requests in progress", qName),
55                 },
56                 func() float64 { return float64(getWorkQueueStatus(q).InProgress) },
57         ))
58         m.reg.MustRegister(prometheus.NewGaugeFunc(
59                 prometheus.GaugeOpts{
60                         Namespace: "arvados",
61                         Subsystem: "keepstore",
62                         Name:      fmt.Sprintf("%s_queue_queued", qName),
63                         Help:      fmt.Sprintf("Number of queued %s requests", qName),
64                 },
65                 func() float64 { return float64(getWorkQueueStatus(q).Queued) },
66         ))
67 }
68
69 func (m *nodeMetrics) setupRequestMetrics(rc httpserver.RequestCounter) {
70         m.reg.MustRegister(prometheus.NewGaugeFunc(
71                 prometheus.GaugeOpts{
72                         Namespace: "arvados",
73                         Subsystem: "keepstore",
74                         Name:      "requests_current",
75                         Help:      "Number of requests in progress",
76                 },
77                 func() float64 { return float64(rc.Current()) },
78         ))
79         m.reg.MustRegister(prometheus.NewGaugeFunc(
80                 prometheus.GaugeOpts{
81                         Namespace: "arvados",
82                         Subsystem: "keepstore",
83                         Name:      "requests_max",
84                         Help:      "Maximum number of concurrent requests",
85                 },
86                 func() float64 { return float64(rc.Max()) },
87         ))
88 }
89
90 type volumeMetricsVecs struct {
91         BytesFree  *prometheus.GaugeVec
92         BytesUsed  *prometheus.GaugeVec
93         Errors     *prometheus.CounterVec
94         Ops        *prometheus.CounterVec
95         CompareOps *prometheus.CounterVec
96         GetOps     *prometheus.CounterVec
97         PutOps     *prometheus.CounterVec
98         TouchOps   *prometheus.CounterVec
99         InBytes    *prometheus.CounterVec
100         OutBytes   *prometheus.CounterVec
101         ErrorCodes *prometheus.CounterVec
102 }
103
104 type volumeMetrics struct {
105         BytesFree  prometheus.Gauge
106         BytesUsed  prometheus.Gauge
107         Errors     prometheus.Counter
108         Ops        prometheus.Counter
109         CompareOps prometheus.Counter
110         GetOps     prometheus.Counter
111         PutOps     prometheus.Counter
112         TouchOps   prometheus.Counter
113         InBytes    prometheus.Counter
114         OutBytes   prometheus.Counter
115         ErrorCodes *prometheus.CounterVec
116 }
117
118 func newVolumeMetricsVecs(reg *prometheus.Registry) *volumeMetricsVecs {
119         m := &volumeMetricsVecs{}
120         m.BytesFree = prometheus.NewGaugeVec(
121                 prometheus.GaugeOpts{
122                         Namespace: "arvados",
123                         Subsystem: "keepstore",
124                         Name:      "volume_bytes_free",
125                         Help:      "Number of free bytes on the volume",
126                 },
127                 []string{"label", "mount_point", "device_number"},
128         )
129         reg.MustRegister(m.BytesFree)
130         m.BytesUsed = prometheus.NewGaugeVec(
131                 prometheus.GaugeOpts{
132                         Namespace: "arvados",
133                         Subsystem: "keepstore",
134                         Name:      "volume_bytes_used",
135                         Help:      "Number of used bytes on the volume",
136                 },
137                 []string{"label", "mount_point", "device_number"},
138         )
139         reg.MustRegister(m.BytesUsed)
140         m.Errors = prometheus.NewCounterVec(
141                 prometheus.CounterOpts{
142                         Namespace: "arvados",
143                         Subsystem: "keepstore",
144                         Name:      "volume_io_errors",
145                         Help:      "Number of volume I/O errors",
146                 },
147                 []string{"label", "mount_point", "device_number"},
148         )
149         reg.MustRegister(m.Errors)
150         m.Ops = prometheus.NewCounterVec(
151                 prometheus.CounterOpts{
152                         Namespace: "arvados",
153                         Subsystem: "keepstore",
154                         Name:      "volume_io_ops",
155                         Help:      "Number of volume I/O operations",
156                 },
157                 []string{"label", "mount_point", "device_number"},
158         )
159         reg.MustRegister(m.Ops)
160         m.CompareOps = prometheus.NewCounterVec(
161                 prometheus.CounterOpts{
162                         Namespace: "arvados",
163                         Subsystem: "keepstore",
164                         Name:      "volume_io_compare_ops",
165                         Help:      "Number of volume I/O compare operations",
166                 },
167                 []string{"label", "mount_point", "device_number"},
168         )
169         reg.MustRegister(m.CompareOps)
170         m.GetOps = prometheus.NewCounterVec(
171                 prometheus.CounterOpts{
172                         Namespace: "arvados",
173                         Subsystem: "keepstore",
174                         Name:      "volume_io_get_ops",
175                         Help:      "Number of volume I/O get operations",
176                 },
177                 []string{"label", "mount_point", "device_number"},
178         )
179         reg.MustRegister(m.GetOps)
180         m.PutOps = prometheus.NewCounterVec(
181                 prometheus.CounterOpts{
182                         Namespace: "arvados",
183                         Subsystem: "keepstore",
184                         Name:      "volume_io_put_ops",
185                         Help:      "Number of volume I/O put operations",
186                 },
187                 []string{"label", "mount_point", "device_number"},
188         )
189         reg.MustRegister(m.PutOps)
190         m.TouchOps = prometheus.NewCounterVec(
191                 prometheus.CounterOpts{
192                         Namespace: "arvados",
193                         Subsystem: "keepstore",
194                         Name:      "volume_io_touch_ops",
195                         Help:      "Number of volume I/O touch operations",
196                 },
197                 []string{"label", "mount_point", "device_number"},
198         )
199         reg.MustRegister(m.TouchOps)
200         m.InBytes = prometheus.NewCounterVec(
201                 prometheus.CounterOpts{
202                         Namespace: "arvados",
203                         Subsystem: "keepstore",
204                         Name:      "volume_io_in_bytes",
205                         Help:      "Number of input bytes",
206                 },
207                 []string{"label", "mount_point", "device_number"},
208         )
209         reg.MustRegister(m.InBytes)
210         m.OutBytes = prometheus.NewCounterVec(
211                 prometheus.CounterOpts{
212                         Namespace: "arvados",
213                         Subsystem: "keepstore",
214                         Name:      "volume_io_out_bytes",
215                         Help:      "Number of output bytes",
216                 },
217                 []string{"label", "mount_point", "device_number"},
218         )
219         reg.MustRegister(m.OutBytes)
220         m.ErrorCodes = prometheus.NewCounterVec(
221                 prometheus.CounterOpts{
222                         Namespace: "arvados",
223                         Subsystem: "keepstore",
224                         Name:      "volume_io_error_codes",
225                         Help:      "Number of I/O errors by error code",
226                 },
227                 []string{"label", "mount_point", "device_number", "error_code"},
228         )
229         reg.MustRegister(m.ErrorCodes)
230
231         return m
232 }
233
234 func (m *volumeMetricsVecs) curryWith(lbl string, mnt string, dev string) *volumeMetrics {
235         lbls := []string{lbl, mnt, dev}
236         curried := &volumeMetrics{
237                 BytesFree:  m.BytesFree.WithLabelValues(lbls...),
238                 BytesUsed:  m.BytesUsed.WithLabelValues(lbls...),
239                 Errors:     m.Errors.WithLabelValues(lbls...),
240                 Ops:        m.Ops.WithLabelValues(lbls...),
241                 CompareOps: m.CompareOps.WithLabelValues(lbls...),
242                 GetOps:     m.GetOps.WithLabelValues(lbls...),
243                 PutOps:     m.PutOps.WithLabelValues(lbls...),
244                 TouchOps:   m.TouchOps.WithLabelValues(lbls...),
245                 InBytes:    m.InBytes.WithLabelValues(lbls...),
246                 OutBytes:   m.OutBytes.WithLabelValues(lbls...),
247                 ErrorCodes: m.ErrorCodes.MustCurryWith(prometheus.Labels{
248                         "label":         lbl,
249                         "mount_point":   mnt,
250                         "device_number": dev,
251                 }),
252         }
253         return curried
254 }