1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
13 "github.com/prometheus/client_golang/prometheus"
14 "github.com/prometheus/client_golang/prometheus/promhttp"
17 type observer interface{ Observe(float64) }
18 type setter interface{ Set(float64) }
21 reg *prometheus.Registry
22 statsGauges map[string]setter
23 statsGaugeVecs map[string]*prometheus.GaugeVec
24 observers map[string]observer
29 func newMetrics(registry *prometheus.Registry) *metrics {
32 statsGauges: map[string]setter{},
33 statsGaugeVecs: map[string]*prometheus.GaugeVec{},
34 observers: map[string]observer{},
38 func (m *metrics) DurationObserver(name, help string) observer {
41 if obs, ok := m.observers[name]; ok {
44 summary := prometheus.NewSummary(prometheus.SummaryOpts{
47 Subsystem: "keepbalance",
50 m.reg.MustRegister(summary)
51 m.observers[name] = summary
55 // UpdateStats updates prometheus metrics using the given
56 // balancerStats. It creates and registers the needed gauges on its
58 func (m *metrics) UpdateStats(s balancerStats) {
63 s2g := map[string]gauge{
64 "total": {s.current, "current backend storage usage"},
65 "garbage": {s.garbage, "garbage (unreferenced, old)"},
66 "transient": {s.unref, "transient (unreferenced, new)"},
67 "overreplicated": {s.overrep, "overreplicated"},
68 "underreplicated": {s.underrep, "underreplicated"},
69 "unachievable": {s.unachievable, "unachievable"},
70 "balanced": {s.justright, "optimally balanced"},
71 "desired": {s.desired, "desired"},
72 "lost": {s.lost, "lost"},
73 "dedup_byte_ratio": {s.dedupByteRatio(), "deduplication ratio, bytes referenced / bytes stored"},
74 "dedup_block_ratio": {s.dedupBlockRatio(), "deduplication ratio, blocks referenced / blocks stored"},
75 "collection_bytes": {s.collectionBytes, "total apparent size of all collections"},
76 "referenced_bytes": {s.collectionBlockBytes, "total size of unique referenced blocks"},
77 "reference_count": {s.collectionBlockRefs, "block references in all collections"},
78 "referenced_blocks": {s.collectionBlocks, "blocks referenced by any collection"},
80 "pull_entries_sent_count": {s.pulls, "total entries sent in pull lists"},
81 "pull_entries_deferred_count": {s.pullsDeferred, "total entries deferred (not sent) in pull lists"},
82 "trash_entries_sent_count": {s.trashes, "total entries sent in trash lists"},
83 "trash_entries_deferred_count": {s.trashesDeferred, "total entries deferred (not sent) in trash lists"},
85 "replicated_block_count": {s.replHistogram, "blocks with indicated number of replicas at last count"},
86 "usage": {s.classStats, "stored in indicated storage class"},
88 m.setupOnce.Do(func() {
89 // Register gauge(s) for each balancerStats field.
90 addGauge := func(name, help string) {
91 g := prometheus.NewGauge(prometheus.GaugeOpts{
98 m.statsGauges[name] = g
100 for name, gauge := range s2g {
101 switch gauge.Value.(type) {
103 for _, sub := range []string{"blocks", "bytes", "replicas"} {
104 addGauge(name+"_"+sub, sub+" of "+gauge.Help)
106 case int, int64, float64:
107 addGauge(name, gauge.Help)
110 gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{
111 Namespace: "arvados",
115 }, []string{"replicas"})
116 m.reg.MustRegister(gv)
117 m.statsGaugeVecs[name] = gv
118 case map[string]replicationStats:
120 for _, sub := range []string{"blocks", "bytes", "replicas"} {
121 name := name + "_" + sub
122 gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{
123 Namespace: "arvados",
127 }, []string{"storage_class", "status"})
128 m.reg.MustRegister(gv)
129 m.statsGaugeVecs[name] = gv
132 panic(fmt.Sprintf("bad gauge type %T", gauge.Value))
136 // Set gauges to values from s.
137 for name, gauge := range s2g {
138 switch val := gauge.Value.(type) {
140 m.statsGauges[name+"_blocks"].Set(float64(val.blocks))
141 m.statsGauges[name+"_bytes"].Set(float64(val.bytes))
142 m.statsGauges[name+"_replicas"].Set(float64(val.replicas))
144 m.statsGauges[name].Set(float64(val))
146 m.statsGauges[name].Set(float64(val))
148 m.statsGauges[name].Set(float64(val))
151 for r, n := range val {
152 m.statsGaugeVecs[name].WithLabelValues(strconv.Itoa(r)).Set(float64(n))
154 // Record zero for higher-than-max-replication
155 // metrics, so we don't incorrectly continue
156 // to report stale metrics.
158 // For example, if we previously reported n=1
159 // for repl=6, but have since restarted
160 // keep-balance and the most replicated block
161 // now has repl=5, then the repl=6 gauge will
162 // still say n=1 until we clear it explicitly
164 for r := len(val); r < len(val)+4 || r < len(val)*2; r++ {
165 m.statsGaugeVecs[name].WithLabelValues(strconv.Itoa(r)).Set(0)
167 case map[string]replicationStats:
169 for class, cs := range val {
170 for label, val := range map[string]blocksNBytes{
172 "unneeded": cs.unneeded,
173 "pulling": cs.pulling,
174 "unachievable": cs.unachievable,
176 m.statsGaugeVecs[name+"_blocks"].WithLabelValues(class, label).Set(float64(val.blocks))
177 m.statsGaugeVecs[name+"_bytes"].WithLabelValues(class, label).Set(float64(val.bytes))
178 m.statsGaugeVecs[name+"_replicas"].WithLabelValues(class, label).Set(float64(val.replicas))
182 panic(fmt.Sprintf("bad gauge type %T", gauge.Value))
187 func (m *metrics) Handler(log promhttp.Logger) http.Handler {
188 return promhttp.HandlerFor(m.reg, promhttp.HandlerOpts{