X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/fe6606652dfe274627eadcef902d6e30d2856440..57792708500261a817e6957e65c80c7f798a36e9:/sdk/go/health/aggregator_test.go diff --git a/sdk/go/health/aggregator_test.go b/sdk/go/health/aggregator_test.go index 5d76c19f29..f76f7b8ea8 100644 --- a/sdk/go/health/aggregator_test.go +++ b/sdk/go/health/aggregator_test.go @@ -13,9 +13,11 @@ import ( "net/http" "net/http/httptest" "regexp" + "runtime" "strings" "time" + "git.arvados.org/arvados.git/lib/cmd" "git.arvados.org/arvados.git/lib/config" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/arvadostest" @@ -48,6 +50,7 @@ func (s *AggregatorSuite) SetUpTest(c *check.C) { cluster.SystemRootToken = arvadostest.SystemRootToken cluster.Collections.BlobSigningKey = arvadostest.BlobSigningKey cluster.Volumes["z"] = arvados.Volume{StorageClasses: map[string]bool{"default": true}} + cluster.Containers.LocalKeepBlobBuffersPerVCPU = 0 s.handler = &Aggregator{Cluster: cluster} s.req = httptest.NewRequest("GET", "/_health/all", nil) s.req.Header.Set("Authorization", "Bearer "+arvadostest.ManagementToken) @@ -123,6 +126,44 @@ func (s *AggregatorSuite) TestHealthyAndUnhealthy(c *check.C) { c.Logf("%#v", ep) } +// If an InternalURL host is 0.0.0.0, localhost, 127/8, or ::1 and +// nothing is listening there, don't fail the health check -- instead, +// assume the relevant component just isn't installed/enabled on this +// node, but does work when contacted through ExternalURL. +func (s *AggregatorSuite) TestUnreachableLoopbackPort(c *check.C) { + srvH, listenH := s.stubServer(&healthyHandler{}) + defer srvH.Close() + s.setAllServiceURLs(listenH) + arvadostest.SetServiceURL(&s.handler.Cluster.Services.Keepproxy, "http://localhost:9/") + arvadostest.SetServiceURL(&s.handler.Cluster.Services.Workbench1, "http://0.0.0.0:9/") + arvadostest.SetServiceURL(&s.handler.Cluster.Services.Keepbalance, "http://127.0.0.127:9/") + arvadostest.SetServiceURL(&s.handler.Cluster.Services.WebDAV, "http://[::1]:9/") + s.handler.ServeHTTP(s.resp, s.req) + s.checkOK(c) + + // If a non-loopback address is unreachable, that's still a + // fail. + s.resp = httptest.NewRecorder() + arvadostest.SetServiceURL(&s.handler.Cluster.Services.WebDAV, "http://172.31.255.254:9/") + s.handler.ServeHTTP(s.resp, s.req) + s.checkUnhealthy(c) +} + +func (s *AggregatorSuite) TestIsLocalHost(c *check.C) { + c.Check(isLocalHost("Localhost"), check.Equals, true) + c.Check(isLocalHost("localhost"), check.Equals, true) + c.Check(isLocalHost("127.0.0.1"), check.Equals, true) + c.Check(isLocalHost("127.0.0.127"), check.Equals, true) + c.Check(isLocalHost("127.1.2.7"), check.Equals, true) + c.Check(isLocalHost("0.0.0.0"), check.Equals, true) + c.Check(isLocalHost("::1"), check.Equals, true) + c.Check(isLocalHost("1.2.3.4"), check.Equals, false) + c.Check(isLocalHost("1::1"), check.Equals, false) + c.Check(isLocalHost("example.com"), check.Equals, false) + c.Check(isLocalHost("127.0.0"), check.Equals, false) + c.Check(isLocalHost(""), check.Equals, false) +} + func (s *AggregatorSuite) TestConfigMismatch(c *check.C) { // time1/hash1: current config time1 := time.Now().Add(time.Second - time.Minute - time.Hour) @@ -182,6 +223,74 @@ func (s *AggregatorSuite) TestConfigMismatch(c *check.C) { s.checkOK(c) } +func (s *AggregatorSuite) TestClockSkew(c *check.C) { + // srv1: report real wall clock time + handler1 := healthyHandler{} + srv1, listen1 := s.stubServer(&handler1) + defer srv1.Close() + // srv2: report near-future time + handler2 := healthyHandler{headerDate: time.Now().Add(3 * time.Second)} + srv2, listen2 := s.stubServer(&handler2) + defer srv2.Close() + // srv3: report far-future time + handler3 := healthyHandler{headerDate: time.Now().Add(3*time.Minute + 3*time.Second)} + srv3, listen3 := s.stubServer(&handler3) + defer srv3.Close() + + s.setAllServiceURLs(listen1) + + // near-future time => OK + s.resp = httptest.NewRecorder() + arvadostest.SetServiceURL(&s.handler.Cluster.Services.DispatchCloud, + "http://localhost"+listen2+"/") + s.handler.ServeHTTP(s.resp, s.req) + s.checkOK(c) + + // far-future time => error + s.resp = httptest.NewRecorder() + arvadostest.SetServiceURL(&s.handler.Cluster.Services.WebDAV, + "http://localhost"+listen3+"/") + s.handler.ServeHTTP(s.resp, s.req) + resp := s.checkUnhealthy(c) + if c.Check(len(resp.Errors) > 0, check.Equals, true) { + c.Check(resp.Errors[0], check.Matches, `clock skew detected: maximum timestamp spread is 3m.* \(exceeds warning threshold of 1m\)`) + } +} + +func (s *AggregatorSuite) TestVersionSkew(c *check.C) { + // srv1: report same version + handler1 := healthyHandler{version: cmd.Version.String()} + srv1, listen1 := s.stubServer(&handler1) + defer srv1.Close() + // srv2: report same version but without " (go1.2.3)" part + handler2 := healthyHandler{version: strings.Fields(cmd.Version.String())[0]} + srv2, listen2 := s.stubServer(&handler2) + defer srv2.Close() + // srv3: report different version + handler3 := healthyHandler{version: "1.2.3~4 (" + runtime.Version() + ")"} + srv3, listen3 := s.stubServer(&handler3) + defer srv3.Close() + + s.setAllServiceURLs(listen1) + + // same version but without go1.2.3 part => OK + s.resp = httptest.NewRecorder() + arvadostest.SetServiceURL(&s.handler.Cluster.Services.RailsAPI, + "http://localhost"+listen2+"/") + s.handler.ServeHTTP(s.resp, s.req) + s.checkOK(c) + + // different version => error + s.resp = httptest.NewRecorder() + arvadostest.SetServiceURL(&s.handler.Cluster.Services.WebDAV, + "http://localhost"+listen3+"/") + s.handler.ServeHTTP(s.resp, s.req) + resp := s.checkUnhealthy(c) + if c.Check(len(resp.Errors) > 0, check.Equals, true) { + c.Check(resp.Errors[0], check.Matches, `version mismatch: \Qkeep-web+http://localhost`+listen3+`\E is running 1.2.3~4 (.*) -- expected \Q`+cmd.Version.String()+`\E`) + } +} + func (s *AggregatorSuite) TestPingTimeout(c *check.C) { s.handler.timeout = arvados.Duration(100 * time.Millisecond) srv, listen := s.stubServer(&slowHandler{}) @@ -207,11 +316,27 @@ func (s *AggregatorSuite) TestCheckCommand(c *check.C) { confdata = regexp.MustCompile(`Source(Timestamp|SHA256): [^\n]+\n`).ReplaceAll(confdata, []byte{}) err = ioutil.WriteFile(tmpdir+"/config.yml", confdata, 0777) c.Assert(err, check.IsNil) + var stdout, stderr bytes.Buffer + exitcode := CheckCommand.RunCommand("check", []string{"-config=" + tmpdir + "/config.yml"}, &bytes.Buffer{}, &stdout, &stderr) c.Check(exitcode, check.Equals, 0) + c.Check(stderr.String(), check.Equals, "health check OK\n") + c.Check(stdout.String(), check.Equals, "") + + stdout.Reset() + stderr.Reset() + exitcode = CheckCommand.RunCommand("check", []string{"-quiet", "-config=" + tmpdir + "/config.yml"}, &bytes.Buffer{}, &stdout, &stderr) + c.Check(exitcode, check.Equals, 0) c.Check(stderr.String(), check.Equals, "") - c.Check(stdout.String(), check.Matches, `(?ms).*(\n|^)health: OK\n.*`) + c.Check(stdout.String(), check.Equals, "") + + stdout.Reset() + stderr.Reset() + exitcode = CheckCommand.RunCommand("check", []string{"-config=" + tmpdir + "/config.yml", "-yaml"}, &bytes.Buffer{}, &stdout, &stderr) + c.Check(exitcode, check.Equals, 0) + c.Check(stderr.String(), check.Equals, "") + c.Check(stdout.String(), check.Matches, `(?ms).*(\n|^)Health: OK\n.*`) } func (s *AggregatorSuite) checkError(c *check.C) { @@ -246,6 +371,8 @@ func (s *AggregatorSuite) setAllServiceURLs(listen string) { &svcs.Controller, &svcs.DispatchCloud, &svcs.DispatchLSF, + &svcs.DispatchSLURM, + &svcs.GitHTTP, &svcs.Keepbalance, &svcs.Keepproxy, &svcs.Keepstore, @@ -271,11 +398,16 @@ func (*unhealthyHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) } type healthyHandler struct { + version string configHash string configTime time.Time + headerDate time.Time } func (h *healthyHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { + if !h.headerDate.IsZero() { + resp.Header().Set("Date", h.headerDate.Format(time.RFC1123)) + } authOK := req.Header.Get("Authorization") == "Bearer "+arvadostest.ManagementToken if req.URL.Path == "/_health/ping" { if !authOK { @@ -298,9 +430,13 @@ arvados_config_load_timestamp_seconds{sha256="%s"} %g # HELP arvados_config_source_timestamp_seconds Timestamp of config file when it was loaded. # TYPE arvados_config_source_timestamp_seconds gauge arvados_config_source_timestamp_seconds{sha256="%s"} %g +# HELP arvados_version_running Indicated version is running. +# TYPE arvados_version_running gauge +arvados_version_running{version="%s"} 1 `, h.configHash, float64(time.Now().UnixNano())/1e9, - h.configHash, float64(t.UnixNano())/1e9) + h.configHash, float64(t.UnixNano())/1e9, + h.version) } else { http.Error(resp, "not found", http.StatusNotFound) }