18700: Add workbench2 to arvados-boot.
[arvados.git] / sdk / go / health / aggregator.go
index a6cb8798aa328a468c1db98c3c3e5bf38773f15c..296ef65dd1f6b52b30a313aebce20716d1a3d4dc 100644 (file)
@@ -6,16 +6,16 @@ package health
 
 import (
        "context"
+       "crypto/tls"
        "encoding/json"
-       "errors"
        "fmt"
-       "net"
        "net/http"
+       "net/url"
        "sync"
        "time"
 
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "git.curoverse.com/arvados.git/sdk/go/auth"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/auth"
 )
 
 const defaultTimeout = arvados.Duration(2 * time.Second)
@@ -28,20 +28,34 @@ type Aggregator struct {
        httpClient *http.Client
        timeout    arvados.Duration
 
-       Config *arvados.Config
+       Cluster *arvados.Cluster
 
        // If non-nil, Log is called after handling each request.
        Log func(*http.Request, error)
 }
 
 func (agg *Aggregator) setup() {
-       agg.httpClient = http.DefaultClient
+       agg.httpClient = &http.Client{
+               Transport: &http.Transport{
+                       TLSClientConfig: &tls.Config{
+                               InsecureSkipVerify: agg.Cluster.TLS.Insecure,
+                       },
+               },
+       }
        if agg.timeout == 0 {
                // this is always the case, except in the test suite
                agg.timeout = defaultTimeout
        }
 }
 
+func (agg *Aggregator) CheckHealth() error {
+       return nil
+}
+
+func (agg *Aggregator) Done() <-chan struct{} {
+       return nil
+}
+
 func (agg *Aggregator) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        agg.setupOnce.Do(agg.setup)
        sendErr := func(statusCode int, err error) {
@@ -54,21 +68,18 @@ func (agg *Aggregator) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
 
        resp.Header().Set("Content-Type", "application/json")
 
-       cluster, err := agg.Config.GetCluster("")
-       if err != nil {
-               err = fmt.Errorf("arvados.GetCluster(): %s", err)
-               sendErr(http.StatusInternalServerError, err)
-               return
-       }
-       if !agg.checkAuth(req, cluster) {
+       if !agg.checkAuth(req) {
                sendErr(http.StatusUnauthorized, errUnauthorized)
                return
        }
-       if req.URL.Path != "/_health/all" {
+       if req.URL.Path == "/_health/all" {
+               json.NewEncoder(resp).Encode(agg.ClusterHealth())
+       } else if req.URL.Path == "/_health/ping" {
+               resp.Write(healthyBody)
+       } else {
                sendErr(http.StatusNotFound, errNotFound)
                return
        }
-       json.NewEncoder(resp).Encode(agg.ClusterHealth(cluster))
        if agg.Log != nil {
                agg.Log(req, nil)
        }
@@ -104,7 +115,8 @@ type ServiceHealth struct {
        N      int    `json:"n"`
 }
 
-func (agg *Aggregator) ClusterHealth(cluster *arvados.Cluster) ClusterHealthResponse {
+func (agg *Aggregator) ClusterHealth() ClusterHealthResponse {
+       agg.setupOnce.Do(agg.setup)
        resp := ClusterHealthResponse{
                Health:   "OK",
                Checks:   make(map[string]CheckResult),
@@ -113,46 +125,41 @@ func (agg *Aggregator) ClusterHealth(cluster *arvados.Cluster) ClusterHealthResp
 
        mtx := sync.Mutex{}
        wg := sync.WaitGroup{}
-       for profileName, profile := range cluster.NodeProfiles {
-               for svc, addr := range profile.ServicePorts() {
-                       // Ensure svc is listed in resp.Services.
-                       mtx.Lock()
-                       if _, ok := resp.Services[svc]; !ok {
-                               resp.Services[svc] = ServiceHealth{Health: "ERROR"}
-                       }
-                       mtx.Unlock()
-
-                       if addr == "" {
-                               // svc is not expected on this node.
-                               continue
-                       }
+       for svcName, svc := range agg.Cluster.Services.Map() {
+               // Ensure svc is listed in resp.Services.
+               mtx.Lock()
+               if _, ok := resp.Services[svcName]; !ok {
+                       resp.Services[svcName] = ServiceHealth{Health: "ERROR"}
+               }
+               mtx.Unlock()
 
+               for addr := range svc.InternalURLs {
                        wg.Add(1)
-                       go func(profileName string, svc arvados.ServiceName, addr string) {
+                       go func(svcName arvados.ServiceName, addr arvados.URL) {
                                defer wg.Done()
                                var result CheckResult
-                               url, err := agg.pingURL(profileName, addr)
+                               pingURL, err := agg.pingURL(addr)
                                if err != nil {
                                        result = CheckResult{
                                                Health: "ERROR",
                                                Error:  err.Error(),
                                        }
                                } else {
-                                       result = agg.ping(url, cluster)
+                                       result = agg.ping(pingURL)
                                }
 
                                mtx.Lock()
                                defer mtx.Unlock()
-                               resp.Checks[fmt.Sprintf("%s+%s", svc, url)] = result
+                               resp.Checks[fmt.Sprintf("%s+%s", svcName, pingURL)] = result
                                if result.Health == "OK" {
-                                       h := resp.Services[svc]
+                                       h := resp.Services[svcName]
                                        h.N++
                                        h.Health = "OK"
-                                       resp.Services[svc] = h
+                                       resp.Services[svcName] = h
                                } else {
                                        resp.Health = "ERROR"
                                }
-                       }(profileName, svc, addr)
+                       }(svcName, addr)
                }
        }
        wg.Wait()
@@ -168,58 +175,60 @@ func (agg *Aggregator) ClusterHealth(cluster *arvados.Cluster) ClusterHealthResp
        return resp
 }
 
-func (agg *Aggregator) pingURL(node, addr string) (string, error) {
-       _, port, err := net.SplitHostPort(addr)
-       return "http://" + node + ":" + port + "/_health/ping", err
+func (agg *Aggregator) pingURL(svcURL arvados.URL) (*url.URL, error) {
+       base := url.URL(svcURL)
+       return base.Parse("/_health/ping")
 }
 
-func (agg *Aggregator) ping(url string, cluster *arvados.Cluster) (result CheckResult) {
+func (agg *Aggregator) ping(target *url.URL) (result CheckResult) {
        t0 := time.Now()
-
-       var err error
        defer func() {
                result.ResponseTime = json.Number(fmt.Sprintf("%.6f", time.Since(t0).Seconds()))
-               if err != nil {
-                       result.Health, result.Error = "ERROR", err.Error()
-               } else {
-                       result.Health = "OK"
-               }
        }()
+       result.Health = "ERROR"
 
-       req, err := http.NewRequest("GET", url, nil)
+       req, err := http.NewRequest("GET", target.String(), nil)
        if err != nil {
+               result.Error = err.Error()
                return
        }
-       req.Header.Set("Authorization", "Bearer "+cluster.ManagementToken)
+       req.Header.Set("Authorization", "Bearer "+agg.Cluster.ManagementToken)
+
+       // Avoid workbench1's redirect-http-to-https feature
+       req.Header.Set("X-Forwarded-Proto", "https")
 
        ctx, cancel := context.WithTimeout(req.Context(), time.Duration(agg.timeout))
        defer cancel()
        req = req.WithContext(ctx)
        resp, err := agg.httpClient.Do(req)
        if err != nil {
+               result.Error = err.Error()
                return
        }
        result.HTTPStatusCode = resp.StatusCode
        result.HTTPStatusText = resp.Status
        err = json.NewDecoder(resp.Body).Decode(&result.Response)
        if err != nil {
-               err = fmt.Errorf("cannot decode response: %s", err)
+               result.Error = fmt.Sprintf("cannot decode response: %s", err)
        } else if resp.StatusCode != http.StatusOK {
-               err = fmt.Errorf("HTTP %d %s", resp.StatusCode, resp.Status)
+               result.Error = fmt.Sprintf("HTTP %d %s", resp.StatusCode, resp.Status)
        } else if h, _ := result.Response["health"].(string); h != "OK" {
                if e, ok := result.Response["error"].(string); ok && e != "" {
-                       err = errors.New(e)
+                       result.Error = e
+                       return
                } else {
-                       err = fmt.Errorf("health=%q in ping response", h)
+                       result.Error = fmt.Sprintf("health=%q in ping response", h)
+                       return
                }
        }
+       result.Health = "OK"
        return
 }
 
-func (agg *Aggregator) checkAuth(req *http.Request, cluster *arvados.Cluster) bool {
-       creds := auth.NewCredentialsFromHTTPRequest(req)
+func (agg *Aggregator) checkAuth(req *http.Request) bool {
+       creds := auth.CredentialsFromRequest(req)
        for _, token := range creds.Tokens {
-               if token != "" && token == cluster.ManagementToken {
+               if token != "" && token == agg.Cluster.ManagementToken {
                        return true
                }
        }