From: Tom Clegg Date: Mon, 26 Jun 2017 14:10:26 +0000 (-0400) Subject: 11901: Add /_health/ping and /_health/db health checks. X-Git-Tag: 1.1.0~170^2~4 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/7800f12fccea5675d71159ddf7c868f4074f8f56 11901: Add /_health/ping and /_health/db health checks. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/services/ws/event.go b/services/ws/event.go index 304f86bbd0..fd280aebb9 100644 --- a/services/ws/event.go +++ b/services/ws/event.go @@ -17,6 +17,7 @@ type eventSink interface { type eventSource interface { NewSink() eventSink DB() *sql.DB + DBHealth() error } type event struct { diff --git a/services/ws/event_source.go b/services/ws/event_source.go index 7c1b58492d..6a308b3a62 100644 --- a/services/ws/event_source.go +++ b/services/ws/event_source.go @@ -242,6 +242,12 @@ func (ps *pgEventSource) DB() *sql.DB { return ps.db } +func (ps *pgEventSource) DBHealth() error { + ctx, _ := context.WithDeadline(context.Background(), time.Now().Add(time.Second)) + var i int + return ps.db.QueryRowContext(ctx, "SELECT 1").Scan(&i) +} + func (ps *pgEventSource) DebugStatus() interface{} { ps.mtx.Lock() defer ps.mtx.Unlock() diff --git a/services/ws/event_source_test.go b/services/ws/event_source_test.go index b157cfa0eb..94e3ba3ea0 100644 --- a/services/ws/event_source_test.go +++ b/services/ws/event_source_test.go @@ -105,4 +105,6 @@ func (*eventSourceSuite) TestEventSource(c *check.C) { case <-time.After(10 * time.Second): c.Fatal("timed out") } + + c.Check(pges.DBHealth(), check.IsNil) } diff --git a/services/ws/router.go b/services/ws/router.go index 15b825f2ab..b2c94e7109 100644 --- a/services/ws/router.go +++ b/services/ws/router.go @@ -55,6 +55,8 @@ func (rtr *router) setup() { rtr.mux.Handle("/arvados/v1/events.ws", rtr.makeServer(newSessionV1)) rtr.mux.HandleFunc("/debug.json", jsonHandler(rtr.DebugStatus)) rtr.mux.HandleFunc("/status.json", jsonHandler(rtr.Status)) + rtr.mux.HandleFunc("/_health/ping", jsonHandler(rtr.HealthFunc(func() error { return nil }))) + rtr.mux.HandleFunc("/_health/db", jsonHandler(rtr.HealthFunc(rtr.eventSource.DBHealth))) } func (rtr *router) makeServer(newSession sessionFactory) *websocket.Server { @@ -102,6 +104,21 @@ func (rtr *router) DebugStatus() interface{} { return s } +var pingResponseOK = map[string]string{"health": "OK"} + +func (rtr *router) HealthFunc(f func() error) func() interface{} { + return func() interface{} { + err := f() + if err == nil { + return pingResponseOK + } + return map[string]string{ + "health": "ERROR", + "error": err.Error(), + } + } +} + func (rtr *router) Status() interface{} { return map[string]interface{}{ "Clients": atomic.LoadInt64(&rtr.status.ReqsActive), diff --git a/services/ws/server_test.go b/services/ws/server_test.go index d74f7dff42..57c734af2d 100644 --- a/services/ws/server_test.go +++ b/services/ws/server_test.go @@ -1,6 +1,8 @@ package main import ( + "io/ioutil" + "net/http" "sync" "time" @@ -11,9 +13,17 @@ import ( var _ = check.Suite(&serverSuite{}) type serverSuite struct { + cfg *wsConfig + srv *server + wg sync.WaitGroup } -func testConfig() *wsConfig { +func (s *serverSuite) SetUpTest(c *check.C) { + s.cfg = s.testConfig() + s.srv = &server{wsConfig: s.cfg} +} + +func (*serverSuite) testConfig() *wsConfig { cfg := defaultConfig() cfg.Client = *(arvados.NewClientFromEnv()) cfg.Postgres = testDBConfig() @@ -24,20 +34,18 @@ func testConfig() *wsConfig { // TestBadDB ensures Run() returns an error (instead of panicking or // deadlocking) if it can't connect to the database server at startup. func (s *serverSuite) TestBadDB(c *check.C) { - cfg := testConfig() - cfg.Postgres["password"] = "1234" - srv := &server{wsConfig: cfg} + s.cfg.Postgres["password"] = "1234" var wg sync.WaitGroup wg.Add(1) go func() { - err := srv.Run() + err := s.srv.Run() c.Check(err, check.NotNil) wg.Done() }() wg.Add(1) go func() { - srv.WaitReady() + s.srv.WaitReady() wg.Done() }() @@ -53,9 +61,12 @@ func (s *serverSuite) TestBadDB(c *check.C) { } } -func newTestServer() *server { - srv := &server{wsConfig: testConfig()} - go srv.Run() - srv.WaitReady() - return srv +func (s *serverSuite) TestHealth(c *check.C) { + go s.srv.Run() + s.srv.WaitReady() + resp, err := http.Get("http://" + s.srv.listener.Addr().String() + "/_health/ping") + c.Check(err, check.IsNil) + buf, err := ioutil.ReadAll(resp.Body) + c.Check(err, check.IsNil) + c.Check(string(buf), check.Equals, `{"health":"OK"}`+"\n") } diff --git a/services/ws/session_v0_test.go b/services/ws/session_v0_test.go index 85e36560e8..f6fe3f60e6 100644 --- a/services/ws/session_v0_test.go +++ b/services/ws/session_v0_test.go @@ -25,11 +25,13 @@ func init() { var _ = check.Suite(&v0Suite{}) type v0Suite struct { - token string - toDelete []string + serverSuite serverSuite + token string + toDelete []string } func (s *v0Suite) SetUpTest(c *check.C) { + s.serverSuite.SetUpTest(c) s.token = arvadostest.ActiveToken } @@ -227,7 +229,9 @@ func (s *v0Suite) expectLog(c *check.C, r *json.Decoder) *arvados.Log { } func (s *v0Suite) testClient() (*server, *websocket.Conn, *json.Decoder, *json.Encoder) { - srv := newTestServer() + go s.serverSuite.srv.Run() + s.serverSuite.srv.WaitReady() + srv := s.serverSuite.srv conn, err := websocket.Dial("ws://"+srv.listener.Addr().String()+"/websocket?api_token="+s.token, "", "http://"+srv.listener.Addr().String()) if err != nil { panic(err)