From f50aff88ccf1ce6e590a3fe98689eabef4ad292a Mon Sep 17 00:00:00 2001 From: Lucas Di Pentima Date: Wed, 13 Feb 2019 19:58:45 -0300 Subject: [PATCH 1/1] 13937: Adds facility for drivers to register their own counters. Used on unix_volume. All the others are pending. Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima --- services/keepstore/metrics.go | 73 ++++++++++++++++++++--------- services/keepstore/pipe_adapters.go | 2 +- services/keepstore/stats_ticker.go | 16 +++++-- services/keepstore/volume_unix.go | 39 ++++++++++++++- 4 files changed, 102 insertions(+), 28 deletions(-) diff --git a/services/keepstore/metrics.go b/services/keepstore/metrics.go index 63e406c930..80194eb824 100644 --- a/services/keepstore/metrics.go +++ b/services/keepstore/metrics.go @@ -88,6 +88,7 @@ func (m *nodeMetrics) setupRequestMetrics(rc httpserver.RequestCounter) { } type volumeMetricsVecs struct { + reg *prometheus.Registry BytesFree *prometheus.GaugeVec BytesUsed *prometheus.GaugeVec Errors *prometheus.CounterVec @@ -102,21 +103,26 @@ type volumeMetricsVecs struct { } type volumeMetrics struct { - BytesFree prometheus.Gauge - BytesUsed prometheus.Gauge - Errors prometheus.Counter - Ops prometheus.Counter - CompareOps prometheus.Counter - GetOps prometheus.Counter - PutOps prometheus.Counter - TouchOps prometheus.Counter - InBytes prometheus.Counter - OutBytes prometheus.Counter - ErrorCodes *prometheus.CounterVec + reg *prometheus.Registry + lbls []string + internalCounters map[string]*prometheus.CounterVec + BytesFree prometheus.Gauge + BytesUsed prometheus.Gauge + Errors prometheus.Counter + Ops prometheus.Counter + CompareOps prometheus.Counter + GetOps prometheus.Counter + PutOps prometheus.Counter + TouchOps prometheus.Counter + InBytes prometheus.Counter + OutBytes prometheus.Counter + ErrorCodes *prometheus.CounterVec } func newVolumeMetricsVecs(reg *prometheus.Registry) *volumeMetricsVecs { - m := &volumeMetricsVecs{} + m := &volumeMetricsVecs{ + reg: reg, + } m.BytesFree = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: "arvados", @@ -234,16 +240,19 @@ func newVolumeMetricsVecs(reg *prometheus.Registry) *volumeMetricsVecs { func (m *volumeMetricsVecs) curryWith(lbl string, mnt string, dev string) *volumeMetrics { lbls := []string{lbl, mnt, dev} curried := &volumeMetrics{ - BytesFree: m.BytesFree.WithLabelValues(lbls...), - BytesUsed: m.BytesUsed.WithLabelValues(lbls...), - Errors: m.Errors.WithLabelValues(lbls...), - Ops: m.Ops.WithLabelValues(lbls...), - CompareOps: m.CompareOps.WithLabelValues(lbls...), - GetOps: m.GetOps.WithLabelValues(lbls...), - PutOps: m.PutOps.WithLabelValues(lbls...), - TouchOps: m.TouchOps.WithLabelValues(lbls...), - InBytes: m.InBytes.WithLabelValues(lbls...), - OutBytes: m.OutBytes.WithLabelValues(lbls...), + reg: m.reg, + lbls: lbls, + internalCounters: make(map[string]*prometheus.CounterVec), + BytesFree: m.BytesFree.WithLabelValues(lbls...), + BytesUsed: m.BytesUsed.WithLabelValues(lbls...), + Errors: m.Errors.WithLabelValues(lbls...), + Ops: m.Ops.WithLabelValues(lbls...), + CompareOps: m.CompareOps.WithLabelValues(lbls...), + GetOps: m.GetOps.WithLabelValues(lbls...), + PutOps: m.PutOps.WithLabelValues(lbls...), + TouchOps: m.TouchOps.WithLabelValues(lbls...), + InBytes: m.InBytes.WithLabelValues(lbls...), + OutBytes: m.OutBytes.WithLabelValues(lbls...), ErrorCodes: m.ErrorCodes.MustCurryWith(prometheus.Labels{ "label": lbl, "mount_point": mnt, @@ -252,3 +261,23 @@ func (m *volumeMetricsVecs) curryWith(lbl string, mnt string, dev string) *volum } return curried } + +// Returns a driver specific counter, creating it when needed. The 'name' argument +// should include the driver prefix. +func (m *volumeMetrics) getInternalCounter(name string, help string) prometheus.Counter { + counterVec, ok := m.internalCounters[name] + if !ok { + counterVec = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "arvados", + Subsystem: "keepstore", + Name: name, + Help: help, + }, + []string{"label", "mount_point", "device_number"}, + ) + m.reg.MustRegister(counterVec) + m.internalCounters[name] = counterVec + } + return counterVec.WithLabelValues(m.lbls...) +} diff --git a/services/keepstore/pipe_adapters.go b/services/keepstore/pipe_adapters.go index e4a5865a43..69ed6d2ff5 100644 --- a/services/keepstore/pipe_adapters.go +++ b/services/keepstore/pipe_adapters.go @@ -39,7 +39,7 @@ func getWithPipe(ctx context.Context, loc string, buf []byte, br BlockReader) (i } } -// putWithPipe invokes putter with a new pipe, and and copies data +// putWithPipe invokes putter with a new pipe, and copies data // from buf into the pipe. If ctx is done before all data is copied, // putWithPipe closes the pipe with an error, and returns early with // an error. diff --git a/services/keepstore/stats_ticker.go b/services/keepstore/stats_ticker.go index 7f52b744d5..a9f24744b3 100644 --- a/services/keepstore/stats_ticker.go +++ b/services/keepstore/stats_ticker.go @@ -48,7 +48,9 @@ func (s *statsTicker) TickErr(err error, errType string) { if err == nil { return } - s.errors.Inc() + if s.errors != nil { + s.errors.Inc() + } s.Tick(&s.Errors) s.lock.Lock() @@ -57,17 +59,23 @@ func (s *statsTicker) TickErr(err error, errType string) { } s.ErrorCodes[errType]++ s.lock.Unlock() - s.errCounters.WithLabelValues(errType).Inc() + if s.errCounters != nil { + s.errCounters.WithLabelValues(errType).Inc() + } } // TickInBytes increments the incoming byte counter by n. func (s *statsTicker) TickInBytes(n uint64) { - s.inBytes.Add(float64(n)) + if s.inBytes != nil { + s.inBytes.Add(float64(n)) + } atomic.AddUint64(&s.InBytes, n) } // TickOutBytes increments the outgoing byte counter by n. func (s *statsTicker) TickOutBytes(n uint64) { - s.outBytes.Add(float64(n)) + if s.outBytes != nil { + s.outBytes.Add(float64(n)) + } atomic.AddUint64(&s.OutBytes, n) } diff --git a/services/keepstore/volume_unix.go b/services/keepstore/volume_unix.go index 5313059eaa..8d61f96197 100644 --- a/services/keepstore/volume_unix.go +++ b/services/keepstore/volume_unix.go @@ -21,6 +21,8 @@ import ( "sync/atomic" "syscall" "time" + + "github.com/prometheus/client_golang/prometheus" ) type unixVolumeAdder struct { @@ -234,7 +236,7 @@ func (v *UnixVolume) Start(m *volumeMetrics) error { if err == nil { // Set up prometheus metrics v.metrics = m - v.os.stats.statsTicker.setup(m) + v.os.stats.setup(v.metrics) // Periodically update free/used volume space go func() { for { @@ -269,6 +271,7 @@ func (v *UnixVolume) Touch(loc string) error { } defer v.unlockfile(f) ts := syscall.NsecToTimespec(time.Now().UnixNano()) + v.os.stats.utimesOps.Inc() v.os.stats.Tick(&v.os.stats.UtimesOps) err = syscall.UtimesNano(p, []syscall.Timespec{ts, ts}) v.os.stats.TickErr(err) @@ -462,6 +465,7 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error { return err } defer rootdir.Close() + v.os.stats.readdirOps.Inc() v.os.stats.Tick(&v.os.stats.ReaddirOps) for { names, err := rootdir.Readdirnames(1) @@ -484,6 +488,7 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error { lastErr = err continue } + v.os.stats.readdirOps.Inc() v.os.stats.Tick(&v.os.stats.ReaddirOps) for { fileInfo, err := blockdir.Readdir(1) @@ -572,6 +577,7 @@ func (v *UnixVolume) Untrash(loc string) (err error) { return MethodDisabledError } + v.os.stats.readdirOps.Inc() v.os.stats.Tick(&v.os.stats.ReaddirOps) files, err := ioutil.ReadDir(v.blockDir(loc)) if err != nil { @@ -718,6 +724,7 @@ func (v *UnixVolume) unlock() { // lockfile and unlockfile use flock(2) to manage kernel file locks. func (v *UnixVolume) lockfile(f *os.File) error { + v.os.stats.flockOps.Inc() v.os.stats.Tick(&v.os.stats.FlockOps) err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX) v.os.stats.TickErr(err) @@ -822,6 +829,27 @@ type unixStats struct { RenameOps uint64 UnlinkOps uint64 ReaddirOps uint64 + // Prometheus metrics -- Above ad-hoc counters will be eventually removed + openOps prometheus.Counter + statOps prometheus.Counter + flockOps prometheus.Counter + utimesOps prometheus.Counter + createOps prometheus.Counter + renameOps prometheus.Counter + unlinkOps prometheus.Counter + readdirOps prometheus.Counter +} + +func (s *unixStats) setup(m *volumeMetrics) { + s.statsTicker.setup(m) + s.openOps = m.getInternalCounter("unix_open_ops", "Number of backend open operations") + s.statOps = m.getInternalCounter("unix_stat_ops", "Number of backend stat operations") + s.flockOps = m.getInternalCounter("unix_flock_ops", "Number of backend flock operations") + s.utimesOps = m.getInternalCounter("unix_utimes_ops", "Number of backend utimes operations") + s.createOps = m.getInternalCounter("unix_create_ops", "Number of backend create operations") + s.renameOps = m.getInternalCounter("unix_rename_ops", "Number of backend rename operations") + s.unlinkOps = m.getInternalCounter("unix_unlink_ops", "Number of backend unlink operations") + s.readdirOps = m.getInternalCounter("unix_readdir_ops", "Number of backend readdir operations") } func (s *unixStats) TickErr(err error) { @@ -836,6 +864,7 @@ type osWithStats struct { } func (o *osWithStats) Open(name string) (*os.File, error) { + o.stats.openOps.Inc() o.stats.Tick(&o.stats.OpenOps) f, err := os.Open(name) o.stats.TickErr(err) @@ -843,6 +872,7 @@ func (o *osWithStats) Open(name string) (*os.File, error) { } func (o *osWithStats) OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) { + o.stats.openOps.Inc() o.stats.Tick(&o.stats.OpenOps) f, err := os.OpenFile(name, flag, perm) o.stats.TickErr(err) @@ -850,6 +880,7 @@ func (o *osWithStats) OpenFile(name string, flag int, perm os.FileMode) (*os.Fil } func (o *osWithStats) Remove(path string) error { + o.stats.unlinkOps.Inc() o.stats.Tick(&o.stats.UnlinkOps) err := os.Remove(path) o.stats.TickErr(err) @@ -857,6 +888,7 @@ func (o *osWithStats) Remove(path string) error { } func (o *osWithStats) Rename(a, b string) error { + o.stats.renameOps.Inc() o.stats.Tick(&o.stats.RenameOps) err := os.Rename(a, b) o.stats.TickErr(err) @@ -864,6 +896,10 @@ func (o *osWithStats) Rename(a, b string) error { } func (o *osWithStats) Stat(path string) (os.FileInfo, error) { + // Avoid segfaulting when called from vol.Status() on theConfig.Start() + if o.stats.statOps != nil { + o.stats.statOps.Inc() + } o.stats.Tick(&o.stats.StatOps) fi, err := os.Stat(path) o.stats.TickErr(err) @@ -871,6 +907,7 @@ func (o *osWithStats) Stat(path string) (os.FileInfo, error) { } func (o *osWithStats) TempFile(dir, base string) (*os.File, error) { + o.stats.createOps.Inc() o.stats.Tick(&o.stats.CreateOps) f, err := ioutil.TempFile(dir, base) o.stats.TickErr(err) -- 2.30.2