Merge branch '19414-keep-balance-panic'
[arvados.git] / services / keep-balance / metrics.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "fmt"
9         "net/http"
10         "sync"
11
12         "github.com/prometheus/client_golang/prometheus"
13         "github.com/prometheus/client_golang/prometheus/promhttp"
14 )
15
16 type observer interface{ Observe(float64) }
17 type setter interface{ Set(float64) }
18
19 type metrics struct {
20         reg         *prometheus.Registry
21         statsGauges map[string]setter
22         observers   map[string]observer
23         setupOnce   sync.Once
24         mtx         sync.Mutex
25 }
26
27 func newMetrics(registry *prometheus.Registry) *metrics {
28         return &metrics{
29                 reg:         registry,
30                 statsGauges: map[string]setter{},
31                 observers:   map[string]observer{},
32         }
33 }
34
35 func (m *metrics) DurationObserver(name, help string) observer {
36         m.mtx.Lock()
37         defer m.mtx.Unlock()
38         if obs, ok := m.observers[name]; ok {
39                 return obs
40         }
41         summary := prometheus.NewSummary(prometheus.SummaryOpts{
42                 Namespace: "arvados",
43                 Name:      name,
44                 Subsystem: "keepbalance",
45                 Help:      help,
46         })
47         m.reg.MustRegister(summary)
48         m.observers[name] = summary
49         return summary
50 }
51
52 // UpdateStats updates prometheus metrics using the given
53 // balancerStats. It creates and registers the needed gauges on its
54 // first invocation.
55 func (m *metrics) UpdateStats(s balancerStats) {
56         type gauge struct {
57                 Value interface{}
58                 Help  string
59         }
60         s2g := map[string]gauge{
61                 "total":             {s.current, "current backend storage usage"},
62                 "garbage":           {s.garbage, "garbage (unreferenced, old)"},
63                 "transient":         {s.unref, "transient (unreferenced, new)"},
64                 "overreplicated":    {s.overrep, "overreplicated"},
65                 "underreplicated":   {s.underrep, "underreplicated"},
66                 "lost":              {s.lost, "lost"},
67                 "dedup_byte_ratio":  {s.dedupByteRatio(), "deduplication ratio, bytes referenced / bytes stored"},
68                 "dedup_block_ratio": {s.dedupBlockRatio(), "deduplication ratio, blocks referenced / blocks stored"},
69         }
70         m.setupOnce.Do(func() {
71                 // Register gauge(s) for each balancerStats field.
72                 addGauge := func(name, help string) {
73                         g := prometheus.NewGauge(prometheus.GaugeOpts{
74                                 Namespace: "arvados",
75                                 Name:      name,
76                                 Subsystem: "keep",
77                                 Help:      help,
78                         })
79                         m.reg.MustRegister(g)
80                         m.statsGauges[name] = g
81                 }
82                 for name, gauge := range s2g {
83                         switch gauge.Value.(type) {
84                         case blocksNBytes:
85                                 for _, sub := range []string{"blocks", "bytes", "replicas"} {
86                                         addGauge(name+"_"+sub, sub+" of "+gauge.Help)
87                                 }
88                         case int, int64, float64:
89                                 addGauge(name, gauge.Help)
90                         default:
91                                 panic(fmt.Sprintf("bad gauge type %T", gauge.Value))
92                         }
93                 }
94         })
95         // Set gauges to values from s.
96         for name, gauge := range s2g {
97                 switch val := gauge.Value.(type) {
98                 case blocksNBytes:
99                         m.statsGauges[name+"_blocks"].Set(float64(val.blocks))
100                         m.statsGauges[name+"_bytes"].Set(float64(val.bytes))
101                         m.statsGauges[name+"_replicas"].Set(float64(val.replicas))
102                 case int:
103                         m.statsGauges[name].Set(float64(val))
104                 case int64:
105                         m.statsGauges[name].Set(float64(val))
106                 case float64:
107                         m.statsGauges[name].Set(float64(val))
108                 default:
109                         panic(fmt.Sprintf("bad gauge type %T", gauge.Value))
110                 }
111         }
112 }
113
114 func (m *metrics) Handler(log promhttp.Logger) http.Handler {
115         return promhttp.HandlerFor(m.reg, promhttp.HandlerOpts{
116                 ErrorLog: log,
117         })
118 }