- }
- if *dumpFlag {
- runOptions.Dumper = log.New(os.Stdout, "", log.LstdFlags)
- }
- err := CheckConfig(cfg, runOptions)
- if err != nil {
- // (don't run)
- } else if runOptions.Once {
- _, err = (&Balancer{}).Run(cfg, runOptions)
- } else {
- err = RunForever(cfg, runOptions, nil)
- }
- if err != nil {
- log.Fatal(err)
- }
-}
-
-func mustReadConfig(dst interface{}, path string) {
- if err := config.LoadFile(dst, path); err != nil {
- log.Fatal(err)
- }
-}
-
-// RunForever runs forever, or (for testing purposes) until the given
-// stop channel is ready to receive.
-func RunForever(config Config, runOptions RunOptions, stop <-chan interface{}) error {
- if runOptions.Logger == nil {
- runOptions.Logger = log.New(os.Stderr, "", log.LstdFlags)
- }
- logger := runOptions.Logger
-
- ticker := time.NewTicker(time.Duration(config.RunPeriod))
-
- // The unbuffered channel here means we only hear SIGUSR1 if
- // it arrives while we're waiting in select{}.
- sigUSR1 := make(chan os.Signal)
- signal.Notify(sigUSR1, syscall.SIGUSR1)
-
- logger.Printf("starting up: will scan every %v and on SIGUSR1", config.RunPeriod)
-
- for {
- if !runOptions.CommitPulls && !runOptions.CommitTrash {
- logger.Print("WARNING: Will scan periodically, but no changes will be committed.")
- logger.Print("======= Consider using -commit-pulls and -commit-trash flags.")
- }
-
- bal := &Balancer{}
- var err error
- runOptions, err = bal.Run(config, runOptions)
- if err != nil {
- logger.Print("run failed: ", err)
- } else {
- logger.Print("run succeeded")
- }
-
- select {
- case <-stop:
- signal.Stop(sigUSR1)
- return nil
- case <-ticker.C:
- logger.Print("timer went off")
- case <-sigUSR1:
- logger.Print("received SIGUSR1, resetting timer")
- // Reset the timer so we don't start the N+1st
- // run too soon after the Nth run is triggered
- // by SIGUSR1.
- ticker.Stop()
- ticker = time.NewTicker(time.Duration(config.RunPeriod))
- }
- logger.Print("starting next run")
- }
+ })
+
+ return service.Command(arvados.ServiceNameKeepbalance,
+ func(ctx context.Context, cluster *arvados.Cluster, token string, registry *prometheus.Registry) service.Handler {
+ if !options.Once && cluster.Collections.BalancePeriod == arvados.Duration(0) {
+ return service.ErrorHandler(ctx, cluster, fmt.Errorf("cannot start service: Collections.BalancePeriod is zero (if you want to run once and then exit, use the -once flag)"))
+ }
+
+ ac, err := arvados.NewClientFromConfig(cluster)
+ ac.AuthToken = token
+ if err != nil {
+ return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err))
+ }
+
+ db, err := sqlx.Open("postgres", cluster.PostgreSQL.Connection.String())
+ if err != nil {
+ return service.ErrorHandler(ctx, cluster, fmt.Errorf("postgresql connection failed: %s", err))
+ }
+ if p := cluster.PostgreSQL.ConnectionPool; p > 0 {
+ db.SetMaxOpenConns(p)
+ }
+ err = db.Ping()
+ if err != nil {
+ return service.ErrorHandler(ctx, cluster, fmt.Errorf("postgresql connection succeeded but ping failed: %s", err))
+ }
+
+ if options.Logger == nil {
+ options.Logger = ctxlog.FromContext(ctx)
+ }
+
+ srv := &Server{
+ Cluster: cluster,
+ ArvClient: ac,
+ RunOptions: options,
+ Metrics: newMetrics(registry),
+ Logger: options.Logger,
+ Dumper: options.Dumper,
+ DB: db,
+ }
+ srv.Handler = &health.Handler{
+ Token: cluster.ManagementToken,
+ Prefix: "/_health/",
+ Routes: health.Routes{"ping": srv.CheckHealth},
+ }
+
+ go srv.run(ctx)
+ return srv
+ }).RunCommand(prog, args, stdin, stdout, stderr)