X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/ae03057c03ca6b8543c93a4b3938ba06dad41571..8a27fe370239ecb8e50d53f46b45ed61203a35ca:/services/keep-balance/main.go diff --git a/services/keep-balance/main.go b/services/keep-balance/main.go index 92ed644b59..f0b0df5bd3 100644 --- a/services/keep-balance/main.go +++ b/services/keep-balance/main.go @@ -2,99 +2,117 @@ // // SPDX-License-Identifier: AGPL-3.0 -package main +package keepbalance import ( + "bytes" "context" "flag" "fmt" "io" - "log" - "os" - - "git.curoverse.com/arvados.git/lib/config" - "git.curoverse.com/arvados.git/lib/service" - "git.curoverse.com/arvados.git/sdk/go/arvados" - "git.curoverse.com/arvados.git/sdk/go/ctxlog" - "github.com/sirupsen/logrus" -) - -var ( - version = "dev" - debugf = func(string, ...interface{}) {} - command = service.Command(arvados.ServiceNameKeepbalance, newHandler) - options RunOptions + _ "net/http/pprof" + + "git.arvados.org/arvados.git/lib/cmd" + "git.arvados.org/arvados.git/lib/config" + "git.arvados.org/arvados.git/lib/service" + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/ctxlog" + "git.arvados.org/arvados.git/sdk/go/health" + "github.com/jmoiron/sqlx" + _ "github.com/lib/pq" + "github.com/prometheus/client_golang/prometheus" ) -func newHandler(ctx context.Context, cluster *arvados.Cluster, _ string) service.Handler { - if !options.Once && cluster.Collections.BalancePeriod == arvados.Duration(0) { - return service.ErrorHandler(ctx, cluster, fmt.Errorf("You must either run keep-balance with the -once flag, or set Collections.BalancePeriod in the config. "+ - "If using the legacy keep-balance.yml config, RunPeriod is the equivalant of Collections.BalancePeriod.")) - } - - ac, err := arvados.NewClientFromConfig(cluster) - if err != nil { - return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err)) - } +type command struct{} - if cluster.SystemLogs.LogLevel == "debug" { - debugf = log.Printf - } +var Command = command{} - if options.Logger == nil { - options.Logger = ctxlog.FromContext(ctx) - } - - srv := &Server{ - Cluster: cluster, - ArvClient: ac, - RunOptions: options, - Metrics: newMetrics(), - Logger: options.Logger, - Dumper: options.Dumper, - } - - srv.Start() - return srv -} - -func main() { - os.Exit(runCommand(os.Args[0], os.Args[1:], os.Stdin, os.Stdout, os.Stderr)) -} - -func runCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { - logger := ctxlog.FromContext(context.Background()) - - flags := flag.NewFlagSet(prog, flag.ExitOnError) +func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { + var options RunOptions + flags := flag.NewFlagSet(prog, flag.ContinueOnError) flags.BoolVar(&options.Once, "once", false, "balance once and then exit") flags.BoolVar(&options.CommitPulls, "commit-pulls", false, "send pull requests (make more replicas of blocks that are underreplicated or are not in optimal rendezvous probe order)") flags.BoolVar(&options.CommitTrash, "commit-trash", false, "send trash requests (delete unreferenced old blocks, and excess replicas of overreplicated blocks)") + flags.BoolVar(&options.CommitConfirmedFields, "commit-confirmed-fields", true, + "update collection fields (replicas_confirmed, storage_classes_confirmed, etc.)") + // These options are implemented by service.Command, so we + // don't need the vars here -- we just need the flags + // to pass flags.Parse(). + flags.Bool("dump", false, "dump details for each block to stdout") + flags.String("pprof", "", "serve Go profile data at `[addr]:port`") flags.Bool("version", false, "Write version information to stdout and exit 0") - dumpFlag := flags.Bool("dump", false, "dump details for each block to stdout") - loader := config.NewLoader(os.Stdin, logger) + logger := ctxlog.New(stderr, "json", "info") + loader := config.NewLoader(&bytes.Buffer{}, logger) loader.SetupFlags(flags) - munged := loader.MungeLegacyConfigArgs(logger, args, "-legacy-keepbalance-config") - flags.Parse(munged) - - if *dumpFlag { - dumper := logrus.New() - dumper.Out = os.Stdout - dumper.Formatter = &logrus.TextFormatter{} - options.Dumper = dumper + if ok, code := cmd.ParseFlags(flags, prog, munged, "", stderr); !ok { + return code } - // Only pass along the version flag, which gets handled in RunCommand + // Drop our custom args that would be rejected by the generic + // service.Command args = nil + dropFlag := map[string]bool{ + "once": true, + "commit-pulls": true, + "commit-trash": true, + "commit-confirmed-fields": true, + "dump": true, + } flags.Visit(func(f *flag.Flag) { - if f.Name == "version" { - args = append(args, "-"+f.Name, f.Value.String()) + if !dropFlag[f.Name] { + args = append(args, "-"+f.Name+"="+f.Value.String()) } }) - return command.RunCommand(prog, args, stdin, stdout, stderr) + return service.Command(arvados.ServiceNameKeepbalance, + func(ctx context.Context, cluster *arvados.Cluster, token string, registry *prometheus.Registry) service.Handler { + if !options.Once && cluster.Collections.BalancePeriod == arvados.Duration(0) { + return service.ErrorHandler(ctx, cluster, fmt.Errorf("cannot start service: Collections.BalancePeriod is zero (if you want to run once and then exit, use the -once flag)")) + } + + ac, err := arvados.NewClientFromConfig(cluster) + ac.AuthToken = token + if err != nil { + return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err)) + } + + db, err := sqlx.Open("postgres", cluster.PostgreSQL.Connection.String()) + if err != nil { + return service.ErrorHandler(ctx, cluster, fmt.Errorf("postgresql connection failed: %s", err)) + } + if p := cluster.PostgreSQL.ConnectionPool; p > 0 { + db.SetMaxOpenConns(p) + } + err = db.Ping() + if err != nil { + return service.ErrorHandler(ctx, cluster, fmt.Errorf("postgresql connection succeeded but ping failed: %s", err)) + } + + if options.Logger == nil { + options.Logger = ctxlog.FromContext(ctx) + } + + srv := &Server{ + Cluster: cluster, + ArvClient: ac, + RunOptions: options, + Metrics: newMetrics(registry), + Logger: options.Logger, + Dumper: options.Dumper, + DB: db, + } + srv.Handler = &health.Handler{ + Token: cluster.ManagementToken, + Prefix: "/_health/", + Routes: health.Routes{"ping": srv.CheckHealth}, + } + + go srv.run() + return srv + }).RunCommand(prog, args, stdin, stdout, stderr) }