From c9877122f3474104399bd861ff46fa7455bb922a Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 7 Apr 2022 16:11:55 -0400 Subject: [PATCH] 18794: Add "arvados-server check" subcommand. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- cmd/arvados-server/cmd.go | 2 + sdk/go/health/aggregator.go | 68 +++++++++++++++++++++++++++++++- sdk/go/health/aggregator_test.go | 35 ++++++++++++++-- 3 files changed, 101 insertions(+), 4 deletions(-) diff --git a/cmd/arvados-server/cmd.go b/cmd/arvados-server/cmd.go index 26c3f485ea..9e02d45b67 100644 --- a/cmd/arvados-server/cmd.go +++ b/cmd/arvados-server/cmd.go @@ -21,6 +21,7 @@ import ( "git.arvados.org/arvados.git/lib/install" "git.arvados.org/arvados.git/lib/lsf" "git.arvados.org/arvados.git/lib/recovercollection" + "git.arvados.org/arvados.git/sdk/go/health" "git.arvados.org/arvados.git/services/keepproxy" "git.arvados.org/arvados.git/services/keepstore" "git.arvados.org/arvados.git/services/ws" @@ -33,6 +34,7 @@ var ( "--version": cmd.Version, "boot": boot.Command, + "check": health.CheckCommand, "cloudtest": cloudtest.Command, "config-check": config.CheckCommand, "config-defaults": config.DumpDefaultsCommand, diff --git a/sdk/go/health/aggregator.go b/sdk/go/health/aggregator.go index 23d7e8d431..ba532dddca 100644 --- a/sdk/go/health/aggregator.go +++ b/sdk/go/health/aggregator.go @@ -8,19 +8,27 @@ import ( "context" "crypto/tls" "encoding/json" + "errors" + "flag" "fmt" + "io" "net/http" "net/url" "sync" "time" + "git.arvados.org/arvados.git/lib/cmd" + "git.arvados.org/arvados.git/lib/config" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/auth" + "git.arvados.org/arvados.git/sdk/go/ctxlog" + "github.com/ghodss/yaml" + "github.com/sirupsen/logrus" ) const defaultTimeout = arvados.Duration(2 * time.Second) -// Aggregator implements http.Handler. It handles "GET /_health/all" +// Aggregator implements service.Handler. It handles "GET /_health/all" // by checking the health of all configured services on the cluster // and responding 200 if everything is healthy. type Aggregator struct { @@ -241,3 +249,61 @@ func (agg *Aggregator) checkAuth(req *http.Request) bool { } return false } + +var errSilent = errors.New("") + +var CheckCommand cmd.Handler = checkCommand{} + +type checkCommand struct{} + +func (ccmd checkCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { + logger := ctxlog.New(stderr, "json", "info") + ctx := ctxlog.Context(context.Background(), logger) + err := ccmd.run(ctx, prog, args, stdin, stdout, stderr) + if err != nil { + if err != errSilent { + fmt.Fprintln(stdout, err.Error()) + } + return 1 + } + return 0 +} + +func (ccmd checkCommand) run(ctx context.Context, prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) error { + flags := flag.NewFlagSet("", flag.ContinueOnError) + flags.SetOutput(stderr) + loader := config.NewLoader(stdin, ctxlog.New(stderr, "text", "info")) + loader.SetupFlags(flags) + versionFlag := flags.Bool("version", false, "Write version information to stdout and exit 0") + timeout := flags.Duration("timeout", defaultTimeout.Duration(), "Maximum time to wait for health responses") + if ok, _ := cmd.ParseFlags(flags, prog, args, "", stderr); !ok { + // cmd.ParseFlags already reported the error + return errSilent + } else if *versionFlag { + cmd.Version.RunCommand(prog, args, stdin, stdout, stderr) + return nil + } + cfg, err := loader.Load() + if err != nil { + return err + } + cluster, err := cfg.GetCluster("") + if err != nil { + return err + } + logger := ctxlog.New(stderr, cluster.SystemLogs.Format, cluster.SystemLogs.LogLevel).WithFields(logrus.Fields{ + "ClusterID": cluster.ClusterID, + }) + ctx = ctxlog.Context(ctx, logger) + agg := Aggregator{Cluster: cluster, timeout: arvados.Duration(*timeout)} + resp := agg.ClusterHealth() + buf, err := yaml.Marshal(resp) + if err != nil { + return err + } + stdout.Write(buf) + if resp.Health != "OK" { + return fmt.Errorf("health check failed") + } + return nil +} diff --git a/sdk/go/health/aggregator_test.go b/sdk/go/health/aggregator_test.go index 04106caa44..05f0bdd31b 100644 --- a/sdk/go/health/aggregator_test.go +++ b/sdk/go/health/aggregator_test.go @@ -5,14 +5,19 @@ package health import ( + "bytes" "encoding/json" + "io/ioutil" "net/http" "net/http/httptest" "strings" "time" + "git.arvados.org/arvados.git/lib/config" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/arvadostest" + "git.arvados.org/arvados.git/sdk/go/ctxlog" + "github.com/ghodss/yaml" "gopkg.in/check.v1" ) @@ -30,9 +35,17 @@ func (s *AggregatorSuite) TestInterface(c *check.C) { } func (s *AggregatorSuite) SetUpTest(c *check.C) { - s.handler = &Aggregator{Cluster: &arvados.Cluster{ - ManagementToken: arvadostest.ManagementToken, - }} + ldr := config.NewLoader(bytes.NewBufferString(`Clusters: {zzzzz: {}}`), ctxlog.TestLogger(c)) + ldr.Path = "-" + cfg, err := ldr.Load() + c.Assert(err, check.IsNil) + cluster, err := cfg.GetCluster("") + c.Assert(err, check.IsNil) + cluster.ManagementToken = arvadostest.ManagementToken + cluster.SystemRootToken = arvadostest.SystemRootToken + cluster.Collections.BlobSigningKey = arvadostest.BlobSigningKey + cluster.Volumes["z"] = arvados.Volume{StorageClasses: map[string]bool{"default": true}} + s.handler = &Aggregator{Cluster: cluster} s.req = httptest.NewRequest("GET", "/_health/all", nil) s.req.Header.Set("Authorization", "Bearer "+arvadostest.ManagementToken) s.resp = httptest.NewRecorder() @@ -122,6 +135,22 @@ func (s *AggregatorSuite) TestPingTimeout(c *check.C) { c.Check(rt > 0.005, check.Equals, true) } +func (s *AggregatorSuite) TestCheckCommand(c *check.C) { + srv, listen := s.stubServer(&healthyHandler{}) + defer srv.Close() + s.setAllServiceURLs(listen) + tmpdir := c.MkDir() + confdata, err := yaml.Marshal(arvados.Config{Clusters: map[string]arvados.Cluster{s.handler.Cluster.ClusterID: *s.handler.Cluster}}) + c.Assert(err, check.IsNil) + err = ioutil.WriteFile(tmpdir+"/config.yml", confdata, 0777) + c.Assert(err, check.IsNil) + var stdout, stderr bytes.Buffer + exitcode := CheckCommand.RunCommand("check", []string{"-config=" + tmpdir + "/config.yml"}, &bytes.Buffer{}, &stdout, &stderr) + c.Check(exitcode, check.Equals, 0) + c.Check(stderr.String(), check.Equals, "") + c.Check(stdout.String(), check.Matches, `(?ms).*(\n|^)health: OK\n.*`) +} + func (s *AggregatorSuite) checkError(c *check.C) { c.Check(s.resp.Code, check.Not(check.Equals), http.StatusOK) var resp ClusterHealthResponse -- 2.30.2