X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/e14975a4a482dd4a6c1579fbeee9038d7227b385..89be4b30feccc3680ca77339711b29367754dc05:/sdk/go/health/aggregator.go diff --git a/sdk/go/health/aggregator.go b/sdk/go/health/aggregator.go index 334584b622..564331327a 100644 --- a/sdk/go/health/aggregator.go +++ b/sdk/go/health/aggregator.go @@ -1,8 +1,13 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + package health import ( "context" "encoding/json" + "errors" "fmt" "net" "net/http" @@ -82,7 +87,7 @@ type ClusterHealthResponse struct { // exposes problems that can't be expressed in Checks, like // "service S is needed, but isn't configured to run // anywhere." - Services map[string]ServiceHealth `json:"services"` + Services map[arvados.ServiceName]ServiceHealth `json:"services"` } type CheckResult struct { @@ -103,13 +108,13 @@ func (agg *Aggregator) ClusterHealth(cluster *arvados.Cluster) ClusterHealthResp resp := ClusterHealthResponse{ Health: "OK", Checks: make(map[string]CheckResult), - Services: make(map[string]ServiceHealth), + Services: make(map[arvados.ServiceName]ServiceHealth), } mtx := sync.Mutex{} wg := sync.WaitGroup{} - for node, nodeConfig := range cluster.SystemNodes { - for svc, addr := range nodeConfig.ServicePorts() { + for profileName, profile := range cluster.NodeProfiles { + for svc, addr := range profile.ServicePorts() { // Ensure svc is listed in resp.Services. mtx.Lock() if _, ok := resp.Services[svc]; !ok { @@ -123,10 +128,10 @@ func (agg *Aggregator) ClusterHealth(cluster *arvados.Cluster) ClusterHealthResp } wg.Add(1) - go func(node, svc, addr string) { + go func(profileName string, svc arvados.ServiceName, addr string) { defer wg.Done() var result CheckResult - url, err := agg.pingURL(node, addr) + url, err := agg.pingURL(profileName, addr) if err != nil { result = CheckResult{ Health: "ERROR", @@ -138,7 +143,7 @@ func (agg *Aggregator) ClusterHealth(cluster *arvados.Cluster) ClusterHealthResp mtx.Lock() defer mtx.Unlock() - resp.Checks[svc+"+"+url] = result + resp.Checks[fmt.Sprintf("%s+%s", svc, url)] = result if result.Health == "OK" { h := resp.Services[svc] h.N++ @@ -147,7 +152,7 @@ func (agg *Aggregator) ClusterHealth(cluster *arvados.Cluster) ClusterHealthResp } else { resp.Health = "ERROR" } - }(node, svc, addr) + }(profileName, svc, addr) } } wg.Wait() @@ -199,16 +204,20 @@ func (agg *Aggregator) ping(url string, cluster *arvados.Cluster) (result CheckR err = json.NewDecoder(resp.Body).Decode(&result.Response) if err != nil { err = fmt.Errorf("cannot decode response: %s", err) - return } else if resp.StatusCode != http.StatusOK { err = fmt.Errorf("HTTP %d %s", resp.StatusCode, resp.Status) - return + } else if h, _ := result.Response["health"].(string); h != "OK" { + if e, ok := result.Response["error"].(string); ok && e != "" { + err = errors.New(e) + } else { + err = fmt.Errorf("health=%q in ping response", h) + } } return } func (agg *Aggregator) checkAuth(req *http.Request, cluster *arvados.Cluster) bool { - creds := auth.NewCredentialsFromHTTPRequest(req) + creds := auth.CredentialsFromRequest(req) for _, token := range creds.Tokens { if token != "" && token == cluster.ManagementToken { return true