16217: Remove old status/debug endpoints, test /metrics instead.
authorTom Clegg <tom@tomclegg.ca>
Wed, 25 Mar 2020 20:54:59 +0000 (16:54 -0400)
committerTom Clegg <tom@tomclegg.ca>
Wed, 25 Mar 2020 20:54:59 +0000 (16:54 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

services/ws/event_source.go
services/ws/event_source_test.go
services/ws/router.go
services/ws/service_test.go
services/ws/session_v0_test.go

index 60d4d40aca67ee3b504a8dba6c71def29caa1ba5..3593c3aebd58ceae6932e9667eca43aba8a8c0cf 100644 (file)
@@ -132,16 +132,21 @@ func (ps *pgEventSource) setup() {
                Help:      "Open connections to the database",
        }, []string{"inuse"})
        ps.Reg.MustRegister(openConnections)
+
+       updateDBStats := func() {
+               stats := ps.db.Stats()
+               maxConnections.Set(float64(stats.MaxOpenConnections))
+               openConnections.WithLabelValues("0").Set(float64(stats.Idle))
+               openConnections.WithLabelValues("1").Set(float64(stats.InUse))
+       }
        go func() {
                <-ps.ready
                if ps.db == nil {
                        return
                }
+               updateDBStats()
                for range time.Tick(time.Second) {
-                       stats := ps.db.Stats()
-                       maxConnections.Set(float64(stats.MaxOpenConnections))
-                       openConnections.WithLabelValues("0").Set(float64(stats.Idle))
-                       openConnections.WithLabelValues("1").Set(float64(stats.InUse))
+                       updateDBStats()
                }
        }()
 }
@@ -192,7 +197,7 @@ func (ps *pgEventSource) Run() {
                return
        }
        if ps.MaxOpenConns <= 0 {
-               ps.Logger.Warn("no database connection limit configured -- consider setting PostgresPool>0 in arvados-ws configuration file")
+               ps.Logger.Warn("no database connection limit configured -- consider setting PostgreSQL.ConnectionPool>0 in arvados-ws configuration file")
        }
        db.SetMaxOpenConns(ps.MaxOpenConns)
        if err = db.Ping(); err != nil {
index 9af4c2bf9d7e2a6abc5f16e011b801ff8f479554..b7b8ac3006f3fa6af19de31737af82129dbf8642 100644 (file)
@@ -16,8 +16,6 @@ import (
        check "gopkg.in/check.v1"
 )
 
-var _ debugStatuser = (*pgEventSource)(nil)
-
 var _ = check.Suite(&eventSourceSuite{})
 
 type eventSourceSuite struct{}
index 5f40143fcde0ddc2e33ac027259c3ccdb67ff9f0..878c282f8a6c57f17192b777faba760485757b86 100644 (file)
@@ -5,15 +5,12 @@
 package ws
 
 import (
-       "encoding/json"
        "io"
        "net/http"
-       "strconv"
        "sync"
        "sync/atomic"
        "time"
 
-       "git.arvados.org/arvados.git/lib/cmd"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "git.arvados.org/arvados.git/sdk/go/health"
@@ -40,20 +37,6 @@ type router struct {
        setupOnce sync.Once
        done      chan struct{}
        reg       *prometheus.Registry
-
-       lastReqID  int64
-       lastReqMtx sync.Mutex
-
-       status routerDebugStatus
-}
-
-type routerDebugStatus struct {
-       ReqsReceived int64
-       ReqsActive   int64
-}
-
-type debugStatuser interface {
-       DebugStatus() interface{}
 }
 
 func (rtr *router) setup() {
@@ -70,9 +53,6 @@ func (rtr *router) setup() {
                QueueSize:   rtr.cluster.API.WebsocketClientEventQueue,
        }
        rtr.mux = http.NewServeMux()
-       rtr.mux.Handle("/debug.json", rtr.jsonHandler(rtr.DebugStatus))
-       rtr.mux.Handle("/status.json", rtr.jsonHandler(rtr.Status))
-
        rtr.mux.Handle("/websocket", rtr.makeServer(newSessionV0, mSockets.WithLabelValues("0")))
        rtr.mux.Handle("/arvados/v1/events.ws", rtr.makeServer(newSessionV1, mSockets.WithLabelValues("1")))
        rtr.mux.Handle("/_health/", &health.Handler{
@@ -98,7 +78,6 @@ func (rtr *router) makeServer(newSession sessionFactory, gauge prometheus.Gauge)
                Handler: websocket.Handler(func(ws *websocket.Conn) {
                        t0 := time.Now()
                        logger := ctxlog.FromContext(ws.Request().Context())
-                       logger.Info("connected")
                        atomic.AddInt64(&connected, 1)
                        gauge.Set(float64(atomic.LoadInt64(&connected)))
 
@@ -110,7 +89,7 @@ func (rtr *router) makeServer(newSession sessionFactory, gauge prometheus.Gauge)
                        logger.WithFields(logrus.Fields{
                                "elapsed": time.Now().Sub(t0).Seconds(),
                                "stats":   stats,
-                       }).Info("disconnect")
+                       }).Info("client disconnected")
                        ws.Close()
                        atomic.AddInt64(&connected, -1)
                        gauge.Set(float64(atomic.LoadInt64(&connected)))
@@ -118,65 +97,11 @@ func (rtr *router) makeServer(newSession sessionFactory, gauge prometheus.Gauge)
        }
 }
 
-func (rtr *router) newReqID() string {
-       rtr.lastReqMtx.Lock()
-       defer rtr.lastReqMtx.Unlock()
-       id := time.Now().UnixNano()
-       if id <= rtr.lastReqID {
-               id = rtr.lastReqID + 1
-       }
-       return strconv.FormatInt(id, 36)
-}
-
-func (rtr *router) DebugStatus() interface{} {
-       s := map[string]interface{}{
-               "HTTP":     rtr.status,
-               "Outgoing": rtr.handler.DebugStatus(),
-       }
-       if es, ok := rtr.eventSource.(debugStatuser); ok {
-               s["EventSource"] = es.DebugStatus()
-       }
-       return s
-}
-
-func (rtr *router) Status() interface{} {
-       return map[string]interface{}{
-               "Clients": atomic.LoadInt64(&rtr.status.ReqsActive),
-               "Version": cmd.Version.String(),
-       }
-}
-
 func (rtr *router) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        rtr.setupOnce.Do(rtr.setup)
-       atomic.AddInt64(&rtr.status.ReqsReceived, 1)
-       atomic.AddInt64(&rtr.status.ReqsActive, 1)
-       defer atomic.AddInt64(&rtr.status.ReqsActive, -1)
-
-       logger := ctxlog.FromContext(req.Context()).
-               WithField("RequestID", rtr.newReqID())
-       ctx := ctxlog.Context(req.Context(), logger)
-       req = req.WithContext(ctx)
-       logger.WithFields(logrus.Fields{
-               "remoteAddr":      req.RemoteAddr,
-               "reqForwardedFor": req.Header.Get("X-Forwarded-For"),
-       }).Info("accept request")
        rtr.mux.ServeHTTP(resp, req)
 }
 
-func (rtr *router) jsonHandler(fn func() interface{}) http.Handler {
-       return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-               logger := ctxlog.FromContext(r.Context())
-               w.Header().Set("Content-Type", "application/json")
-               enc := json.NewEncoder(w)
-               err := enc.Encode(fn())
-               if err != nil {
-                       msg := "encode failed"
-                       logger.WithError(err).Error(msg)
-                       http.Error(w, msg, http.StatusInternalServerError)
-               }
-       })
-}
-
 func (rtr *router) CheckHealth() error {
        rtr.setupOnce.Do(rtr.setup)
        return rtr.eventSource.DBHealth()
index 1afd8e006496a88ec30f7edd9f990d9f03e0809d..7213dcad2a9ddbb967991d70a2f9b094ce317b98 100644 (file)
@@ -7,12 +7,12 @@ package ws
 import (
        "bytes"
        "context"
-       "encoding/json"
        "flag"
        "io/ioutil"
        "net/http"
        "net/http/httptest"
        "os"
+       "strings"
        "sync"
        "time"
 
@@ -21,6 +21,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/arvadostest"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "git.arvados.org/arvados.git/sdk/go/httpserver"
        "github.com/prometheus/client_golang/prometheus"
        "github.com/sirupsen/logrus"
        check "gopkg.in/check.v1"
@@ -30,6 +31,7 @@ var _ = check.Suite(&serviceSuite{})
 
 type serviceSuite struct {
        handler service.Handler
+       reg     *prometheus.Registry
        srv     *httptest.Server
        cluster *arvados.Cluster
        wg      sync.WaitGroup
@@ -41,9 +43,11 @@ func (s *serviceSuite) SetUpTest(c *check.C) {
        c.Assert(err, check.IsNil)
 }
 
-func (s *serviceSuite) start() {
-       s.handler = newHandler(context.Background(), s.cluster, "", prometheus.NewRegistry())
-       s.srv = httptest.NewServer(s.handler)
+func (s *serviceSuite) start(c *check.C) {
+       s.reg = prometheus.NewRegistry()
+       s.handler = newHandler(context.Background(), s.cluster, "", s.reg)
+       instrumented := httpserver.Instrument(s.reg, ctxlog.TestLogger(c), s.handler)
+       s.srv = httptest.NewServer(instrumented.ServeAPI(s.cluster.ManagementToken, instrumented))
 }
 
 func (s *serviceSuite) TearDownTest(c *check.C) {
@@ -67,6 +71,7 @@ func (*serviceSuite) testConfig(c *check.C) (*arvados.Cluster, error) {
        cluster.SystemRootToken = client.AuthToken
        cluster.TLS.Insecure = client.Insecure
        cluster.PostgreSQL.Connection = testDBConfig()
+       cluster.PostgreSQL.ConnectionPool = 12
        cluster.Services.Websocket.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: ":"}: arvados.ServiceInstance{}}
        cluster.ManagementToken = arvadostest.ManagementToken
        return cluster, nil
@@ -77,7 +82,7 @@ func (*serviceSuite) testConfig(c *check.C) (*arvados.Cluster, error) {
 // startup.
 func (s *serviceSuite) TestBadDB(c *check.C) {
        s.cluster.PostgreSQL.Connection["password"] = "1234"
-       s.start()
+       s.start(c)
        resp, err := http.Get(s.srv.URL)
        c.Check(err, check.IsNil)
        c.Check(resp.StatusCode, check.Equals, http.StatusInternalServerError)
@@ -87,7 +92,7 @@ func (s *serviceSuite) TestBadDB(c *check.C) {
 }
 
 func (s *serviceSuite) TestHealth(c *check.C) {
-       s.start()
+       s.start(c)
        for _, token := range []string{"", "foo", s.cluster.ManagementToken} {
                req, err := http.NewRequest("GET", s.srv.URL+"/_health/ping", nil)
                c.Assert(err, check.IsNil)
@@ -107,22 +112,36 @@ func (s *serviceSuite) TestHealth(c *check.C) {
        }
 }
 
-func (s *serviceSuite) TestStatus(c *check.C) {
-       s.start()
-       req, err := http.NewRequest("GET", s.srv.URL+"/status.json", nil)
-       c.Assert(err, check.IsNil)
-       resp, err := http.DefaultClient.Do(req)
-       c.Check(err, check.IsNil)
-       c.Check(resp.StatusCode, check.Equals, http.StatusOK)
-       var status map[string]interface{}
-       err = json.NewDecoder(resp.Body).Decode(&status)
-       c.Check(err, check.IsNil)
-       c.Check(status["Version"], check.Not(check.Equals), "")
+func (s *serviceSuite) TestMetrics(c *check.C) {
+       s.start(c)
+       s.handler.CheckHealth()
+       for deadline := time.Now().Add(time.Second); ; {
+               req, err := http.NewRequest("GET", s.srv.URL+"/metrics", nil)
+               c.Assert(err, check.IsNil)
+               req.Header.Set("Authorization", "Bearer "+s.cluster.ManagementToken)
+               resp, err := http.DefaultClient.Do(req)
+               c.Check(err, check.IsNil)
+               c.Check(resp.StatusCode, check.Equals, http.StatusOK)
+               text, err := ioutil.ReadAll(resp.Body)
+               c.Check(err, check.IsNil)
+               if strings.Contains(string(text), "_db_max_connections 0\n") {
+                       // wait for the first db stats update
+                       if time.Now().After(deadline) {
+                               c.Fatal("timed out")
+                       }
+                       time.Sleep(time.Second / 50)
+                       continue
+               }
+               c.Check(string(text), check.Matches, `(?ms).*\narvados_ws_db_max_connections 12\n.*`)
+               c.Check(string(text), check.Matches, `(?ms).*\narvados_ws_db_open_connections\{inuse="0"\} \d+\n.*`)
+               c.Check(string(text), check.Matches, `(?ms).*\narvados_ws_db_open_connections\{inuse="1"\} \d+\n.*`)
+               break
+       }
 }
 
 func (s *serviceSuite) TestHealthDisabled(c *check.C) {
        s.cluster.ManagementToken = ""
-       s.start()
+       s.start(c)
        for _, token := range []string{"", "foo", arvadostest.ManagementToken} {
                req, err := http.NewRequest("GET", s.srv.URL+"/_health/ping", nil)
                c.Assert(err, check.IsNil)
index 45baaa334bc002786da4848d8dafcaa74a84adce..7986cc7b08f95598ae4756be0aa1ca3dea2e2f7b 100644 (file)
@@ -40,7 +40,7 @@ type v0Suite struct {
 
 func (s *v0Suite) SetUpTest(c *check.C) {
        s.serviceSuite.SetUpTest(c)
-       s.serviceSuite.start()
+       s.serviceSuite.start(c)
 
        s.token = arvadostest.ActiveToken
        s.ignoreLogID = s.lastLogID(c)