13937: Adds facility for drivers to register their own counters.
[arvados.git] / services / keepstore / metrics.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "fmt"
9
10         "git.curoverse.com/arvados.git/sdk/go/httpserver"
11         "github.com/prometheus/client_golang/prometheus"
12 )
13
14 type nodeMetrics struct {
15         reg *prometheus.Registry
16 }
17
18 func (m *nodeMetrics) setupBufferPoolMetrics(b *bufferPool) {
19         m.reg.MustRegister(prometheus.NewGaugeFunc(
20                 prometheus.GaugeOpts{
21                         Namespace: "arvados",
22                         Subsystem: "keepstore",
23                         Name:      "bufferpool_bytes_allocated",
24                         Help:      "Number of bytes allocated to buffers",
25                 },
26                 func() float64 { return float64(b.Alloc()) },
27         ))
28         m.reg.MustRegister(prometheus.NewGaugeFunc(
29                 prometheus.GaugeOpts{
30                         Namespace: "arvados",
31                         Subsystem: "keepstore",
32                         Name:      "bufferpool_buffers_max",
33                         Help:      "Maximum number of buffers allowed",
34                 },
35                 func() float64 { return float64(b.Cap()) },
36         ))
37         m.reg.MustRegister(prometheus.NewGaugeFunc(
38                 prometheus.GaugeOpts{
39                         Namespace: "arvados",
40                         Subsystem: "keepstore",
41                         Name:      "bufferpool_buffers_in_use",
42                         Help:      "Number of buffers in use",
43                 },
44                 func() float64 { return float64(b.Len()) },
45         ))
46 }
47
48 func (m *nodeMetrics) setupWorkQueueMetrics(q *WorkQueue, qName string) {
49         m.reg.MustRegister(prometheus.NewGaugeFunc(
50                 prometheus.GaugeOpts{
51                         Namespace: "arvados",
52                         Subsystem: "keepstore",
53                         Name:      fmt.Sprintf("%s_queue_in_progress", qName),
54                         Help:      fmt.Sprintf("Number of %s requests in progress", qName),
55                 },
56                 func() float64 { return float64(getWorkQueueStatus(q).InProgress) },
57         ))
58         m.reg.MustRegister(prometheus.NewGaugeFunc(
59                 prometheus.GaugeOpts{
60                         Namespace: "arvados",
61                         Subsystem: "keepstore",
62                         Name:      fmt.Sprintf("%s_queue_queued", qName),
63                         Help:      fmt.Sprintf("Number of queued %s requests", qName),
64                 },
65                 func() float64 { return float64(getWorkQueueStatus(q).Queued) },
66         ))
67 }
68
69 func (m *nodeMetrics) setupRequestMetrics(rc httpserver.RequestCounter) {
70         m.reg.MustRegister(prometheus.NewGaugeFunc(
71                 prometheus.GaugeOpts{
72                         Namespace: "arvados",
73                         Subsystem: "keepstore",
74                         Name:      "requests_current",
75                         Help:      "Number of requests in progress",
76                 },
77                 func() float64 { return float64(rc.Current()) },
78         ))
79         m.reg.MustRegister(prometheus.NewGaugeFunc(
80                 prometheus.GaugeOpts{
81                         Namespace: "arvados",
82                         Subsystem: "keepstore",
83                         Name:      "requests_max",
84                         Help:      "Maximum number of concurrent requests",
85                 },
86                 func() float64 { return float64(rc.Max()) },
87         ))
88 }
89
90 type volumeMetricsVecs struct {
91         reg        *prometheus.Registry
92         BytesFree  *prometheus.GaugeVec
93         BytesUsed  *prometheus.GaugeVec
94         Errors     *prometheus.CounterVec
95         Ops        *prometheus.CounterVec
96         CompareOps *prometheus.CounterVec
97         GetOps     *prometheus.CounterVec
98         PutOps     *prometheus.CounterVec
99         TouchOps   *prometheus.CounterVec
100         InBytes    *prometheus.CounterVec
101         OutBytes   *prometheus.CounterVec
102         ErrorCodes *prometheus.CounterVec
103 }
104
105 type volumeMetrics struct {
106         reg              *prometheus.Registry
107         lbls             []string
108         internalCounters map[string]*prometheus.CounterVec
109         BytesFree        prometheus.Gauge
110         BytesUsed        prometheus.Gauge
111         Errors           prometheus.Counter
112         Ops              prometheus.Counter
113         CompareOps       prometheus.Counter
114         GetOps           prometheus.Counter
115         PutOps           prometheus.Counter
116         TouchOps         prometheus.Counter
117         InBytes          prometheus.Counter
118         OutBytes         prometheus.Counter
119         ErrorCodes       *prometheus.CounterVec
120 }
121
122 func newVolumeMetricsVecs(reg *prometheus.Registry) *volumeMetricsVecs {
123         m := &volumeMetricsVecs{
124                 reg: reg,
125         }
126         m.BytesFree = prometheus.NewGaugeVec(
127                 prometheus.GaugeOpts{
128                         Namespace: "arvados",
129                         Subsystem: "keepstore",
130                         Name:      "volume_bytes_free",
131                         Help:      "Number of free bytes on the volume",
132                 },
133                 []string{"label", "mount_point", "device_number"},
134         )
135         reg.MustRegister(m.BytesFree)
136         m.BytesUsed = prometheus.NewGaugeVec(
137                 prometheus.GaugeOpts{
138                         Namespace: "arvados",
139                         Subsystem: "keepstore",
140                         Name:      "volume_bytes_used",
141                         Help:      "Number of used bytes on the volume",
142                 },
143                 []string{"label", "mount_point", "device_number"},
144         )
145         reg.MustRegister(m.BytesUsed)
146         m.Errors = prometheus.NewCounterVec(
147                 prometheus.CounterOpts{
148                         Namespace: "arvados",
149                         Subsystem: "keepstore",
150                         Name:      "volume_io_errors",
151                         Help:      "Number of volume I/O errors",
152                 },
153                 []string{"label", "mount_point", "device_number"},
154         )
155         reg.MustRegister(m.Errors)
156         m.Ops = prometheus.NewCounterVec(
157                 prometheus.CounterOpts{
158                         Namespace: "arvados",
159                         Subsystem: "keepstore",
160                         Name:      "volume_io_ops",
161                         Help:      "Number of volume I/O operations",
162                 },
163                 []string{"label", "mount_point", "device_number"},
164         )
165         reg.MustRegister(m.Ops)
166         m.CompareOps = prometheus.NewCounterVec(
167                 prometheus.CounterOpts{
168                         Namespace: "arvados",
169                         Subsystem: "keepstore",
170                         Name:      "volume_io_compare_ops",
171                         Help:      "Number of volume I/O compare operations",
172                 },
173                 []string{"label", "mount_point", "device_number"},
174         )
175         reg.MustRegister(m.CompareOps)
176         m.GetOps = prometheus.NewCounterVec(
177                 prometheus.CounterOpts{
178                         Namespace: "arvados",
179                         Subsystem: "keepstore",
180                         Name:      "volume_io_get_ops",
181                         Help:      "Number of volume I/O get operations",
182                 },
183                 []string{"label", "mount_point", "device_number"},
184         )
185         reg.MustRegister(m.GetOps)
186         m.PutOps = prometheus.NewCounterVec(
187                 prometheus.CounterOpts{
188                         Namespace: "arvados",
189                         Subsystem: "keepstore",
190                         Name:      "volume_io_put_ops",
191                         Help:      "Number of volume I/O put operations",
192                 },
193                 []string{"label", "mount_point", "device_number"},
194         )
195         reg.MustRegister(m.PutOps)
196         m.TouchOps = prometheus.NewCounterVec(
197                 prometheus.CounterOpts{
198                         Namespace: "arvados",
199                         Subsystem: "keepstore",
200                         Name:      "volume_io_touch_ops",
201                         Help:      "Number of volume I/O touch operations",
202                 },
203                 []string{"label", "mount_point", "device_number"},
204         )
205         reg.MustRegister(m.TouchOps)
206         m.InBytes = prometheus.NewCounterVec(
207                 prometheus.CounterOpts{
208                         Namespace: "arvados",
209                         Subsystem: "keepstore",
210                         Name:      "volume_io_in_bytes",
211                         Help:      "Number of input bytes",
212                 },
213                 []string{"label", "mount_point", "device_number"},
214         )
215         reg.MustRegister(m.InBytes)
216         m.OutBytes = prometheus.NewCounterVec(
217                 prometheus.CounterOpts{
218                         Namespace: "arvados",
219                         Subsystem: "keepstore",
220                         Name:      "volume_io_out_bytes",
221                         Help:      "Number of output bytes",
222                 },
223                 []string{"label", "mount_point", "device_number"},
224         )
225         reg.MustRegister(m.OutBytes)
226         m.ErrorCodes = prometheus.NewCounterVec(
227                 prometheus.CounterOpts{
228                         Namespace: "arvados",
229                         Subsystem: "keepstore",
230                         Name:      "volume_io_error_codes",
231                         Help:      "Number of I/O errors by error code",
232                 },
233                 []string{"label", "mount_point", "device_number", "error_code"},
234         )
235         reg.MustRegister(m.ErrorCodes)
236
237         return m
238 }
239
240 func (m *volumeMetricsVecs) curryWith(lbl string, mnt string, dev string) *volumeMetrics {
241         lbls := []string{lbl, mnt, dev}
242         curried := &volumeMetrics{
243                 reg:              m.reg,
244                 lbls:             lbls,
245                 internalCounters: make(map[string]*prometheus.CounterVec),
246                 BytesFree:        m.BytesFree.WithLabelValues(lbls...),
247                 BytesUsed:        m.BytesUsed.WithLabelValues(lbls...),
248                 Errors:           m.Errors.WithLabelValues(lbls...),
249                 Ops:              m.Ops.WithLabelValues(lbls...),
250                 CompareOps:       m.CompareOps.WithLabelValues(lbls...),
251                 GetOps:           m.GetOps.WithLabelValues(lbls...),
252                 PutOps:           m.PutOps.WithLabelValues(lbls...),
253                 TouchOps:         m.TouchOps.WithLabelValues(lbls...),
254                 InBytes:          m.InBytes.WithLabelValues(lbls...),
255                 OutBytes:         m.OutBytes.WithLabelValues(lbls...),
256                 ErrorCodes: m.ErrorCodes.MustCurryWith(prometheus.Labels{
257                         "label":         lbl,
258                         "mount_point":   mnt,
259                         "device_number": dev,
260                 }),
261         }
262         return curried
263 }
264
265 // Returns a driver specific counter, creating it when needed. The 'name' argument
266 // should include the driver prefix.
267 func (m *volumeMetrics) getInternalCounter(name string, help string) prometheus.Counter {
268         counterVec, ok := m.internalCounters[name]
269         if !ok {
270                 counterVec = prometheus.NewCounterVec(
271                         prometheus.CounterOpts{
272                                 Namespace: "arvados",
273                                 Subsystem: "keepstore",
274                                 Name:      name,
275                                 Help:      help,
276                         },
277                         []string{"label", "mount_point", "device_number"},
278                 )
279                 m.reg.MustRegister(counterVec)
280                 m.internalCounters[name] = counterVec
281         }
282         return counterVec.WithLabelValues(m.lbls...)
283 }