X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/e14975a4a482dd4a6c1579fbeee9038d7227b385..2f66d4cc05e9442a9bb69969744d0750a02a1ed4:/sdk/go/health/aggregator.go diff --git a/sdk/go/health/aggregator.go b/sdk/go/health/aggregator.go index 334584b622..a0284e8f24 100644 --- a/sdk/go/health/aggregator.go +++ b/sdk/go/health/aggregator.go @@ -1,16 +1,21 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + package health import ( "context" "encoding/json" + "errors" "fmt" - "net" "net/http" + "net/url" "sync" "time" - "git.curoverse.com/arvados.git/sdk/go/arvados" - "git.curoverse.com/arvados.git/sdk/go/auth" + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/auth" ) const defaultTimeout = arvados.Duration(2 * time.Second) @@ -23,7 +28,7 @@ type Aggregator struct { httpClient *http.Client timeout arvados.Duration - Config *arvados.Config + Cluster *arvados.Cluster // If non-nil, Log is called after handling each request. Log func(*http.Request, error) @@ -37,6 +42,10 @@ func (agg *Aggregator) setup() { } } +func (agg *Aggregator) CheckHealth() error { + return nil +} + func (agg *Aggregator) ServeHTTP(resp http.ResponseWriter, req *http.Request) { agg.setupOnce.Do(agg.setup) sendErr := func(statusCode int, err error) { @@ -49,21 +58,18 @@ func (agg *Aggregator) ServeHTTP(resp http.ResponseWriter, req *http.Request) { resp.Header().Set("Content-Type", "application/json") - cluster, err := agg.Config.GetCluster("") - if err != nil { - err = fmt.Errorf("arvados.GetCluster(): %s", err) - sendErr(http.StatusInternalServerError, err) - return - } - if !agg.checkAuth(req, cluster) { + if !agg.checkAuth(req) { sendErr(http.StatusUnauthorized, errUnauthorized) return } - if req.URL.Path != "/_health/all" { + if req.URL.Path == "/_health/all" { + json.NewEncoder(resp).Encode(agg.ClusterHealth()) + } else if req.URL.Path == "/_health/ping" { + resp.Write(healthyBody) + } else { sendErr(http.StatusNotFound, errNotFound) return } - json.NewEncoder(resp).Encode(agg.ClusterHealth(cluster)) if agg.Log != nil { agg.Log(req, nil) } @@ -82,7 +88,7 @@ type ClusterHealthResponse struct { // exposes problems that can't be expressed in Checks, like // "service S is needed, but isn't configured to run // anywhere." - Services map[string]ServiceHealth `json:"services"` + Services map[arvados.ServiceName]ServiceHealth `json:"services"` } type CheckResult struct { @@ -99,55 +105,51 @@ type ServiceHealth struct { N int `json:"n"` } -func (agg *Aggregator) ClusterHealth(cluster *arvados.Cluster) ClusterHealthResponse { +func (agg *Aggregator) ClusterHealth() ClusterHealthResponse { + agg.setupOnce.Do(agg.setup) resp := ClusterHealthResponse{ Health: "OK", Checks: make(map[string]CheckResult), - Services: make(map[string]ServiceHealth), + Services: make(map[arvados.ServiceName]ServiceHealth), } mtx := sync.Mutex{} wg := sync.WaitGroup{} - for node, nodeConfig := range cluster.SystemNodes { - for svc, addr := range nodeConfig.ServicePorts() { - // Ensure svc is listed in resp.Services. - mtx.Lock() - if _, ok := resp.Services[svc]; !ok { - resp.Services[svc] = ServiceHealth{Health: "ERROR"} - } - mtx.Unlock() - - if addr == "" { - // svc is not expected on this node. - continue - } + for svcName, svc := range agg.Cluster.Services.Map() { + // Ensure svc is listed in resp.Services. + mtx.Lock() + if _, ok := resp.Services[svcName]; !ok { + resp.Services[svcName] = ServiceHealth{Health: "ERROR"} + } + mtx.Unlock() + for addr := range svc.InternalURLs { wg.Add(1) - go func(node, svc, addr string) { + go func(svcName arvados.ServiceName, addr arvados.URL) { defer wg.Done() var result CheckResult - url, err := agg.pingURL(node, addr) + pingURL, err := agg.pingURL(addr) if err != nil { result = CheckResult{ Health: "ERROR", Error: err.Error(), } } else { - result = agg.ping(url, cluster) + result = agg.ping(pingURL) } mtx.Lock() defer mtx.Unlock() - resp.Checks[svc+"+"+url] = result + resp.Checks[fmt.Sprintf("%s+%s", svcName, pingURL)] = result if result.Health == "OK" { - h := resp.Services[svc] + h := resp.Services[svcName] h.N++ h.Health = "OK" - resp.Services[svc] = h + resp.Services[svcName] = h } else { resp.Health = "ERROR" } - }(node, svc, addr) + }(svcName, addr) } } wg.Wait() @@ -163,12 +165,12 @@ func (agg *Aggregator) ClusterHealth(cluster *arvados.Cluster) ClusterHealthResp return resp } -func (agg *Aggregator) pingURL(node, addr string) (string, error) { - _, port, err := net.SplitHostPort(addr) - return "http://" + node + ":" + port + "/_health/ping", err +func (agg *Aggregator) pingURL(svcURL arvados.URL) (*url.URL, error) { + base := url.URL(svcURL) + return base.Parse("/_health/ping") } -func (agg *Aggregator) ping(url string, cluster *arvados.Cluster) (result CheckResult) { +func (agg *Aggregator) ping(target *url.URL) (result CheckResult) { t0 := time.Now() var err error @@ -181,11 +183,11 @@ func (agg *Aggregator) ping(url string, cluster *arvados.Cluster) (result CheckR } }() - req, err := http.NewRequest("GET", url, nil) + req, err := http.NewRequest("GET", target.String(), nil) if err != nil { return } - req.Header.Set("Authorization", "Bearer "+cluster.ManagementToken) + req.Header.Set("Authorization", "Bearer "+agg.Cluster.ManagementToken) ctx, cancel := context.WithTimeout(req.Context(), time.Duration(agg.timeout)) defer cancel() @@ -199,18 +201,22 @@ func (agg *Aggregator) ping(url string, cluster *arvados.Cluster) (result CheckR err = json.NewDecoder(resp.Body).Decode(&result.Response) if err != nil { err = fmt.Errorf("cannot decode response: %s", err) - return } else if resp.StatusCode != http.StatusOK { err = fmt.Errorf("HTTP %d %s", resp.StatusCode, resp.Status) - return + } else if h, _ := result.Response["health"].(string); h != "OK" { + if e, ok := result.Response["error"].(string); ok && e != "" { + err = errors.New(e) + } else { + err = fmt.Errorf("health=%q in ping response", h) + } } return } -func (agg *Aggregator) checkAuth(req *http.Request, cluster *arvados.Cluster) bool { - creds := auth.NewCredentialsFromHTTPRequest(req) +func (agg *Aggregator) checkAuth(req *http.Request) bool { + creds := auth.CredentialsFromRequest(req) for _, token := range creds.Tokens { - if token != "" && token == cluster.ManagementToken { + if token != "" && token == agg.Cluster.ManagementToken { return true } }