37ed06369bbf139a8e89e7bd420035b8bd6ae7ee
[arvados.git] / services / keep-balance / main.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "context"
9         "flag"
10         "fmt"
11         "io"
12         "net/http"
13         _ "net/http/pprof"
14         "os"
15
16         "git.arvados.org/arvados.git/lib/config"
17         "git.arvados.org/arvados.git/lib/service"
18         "git.arvados.org/arvados.git/sdk/go/arvados"
19         "git.arvados.org/arvados.git/sdk/go/ctxlog"
20         "git.arvados.org/arvados.git/sdk/go/health"
21         "github.com/jmoiron/sqlx"
22         _ "github.com/lib/pq"
23         "github.com/prometheus/client_golang/prometheus"
24         "github.com/sirupsen/logrus"
25 )
26
27 func main() {
28         os.Exit(runCommand(os.Args[0], os.Args[1:], os.Stdin, os.Stdout, os.Stderr))
29 }
30
31 func runCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
32         logger := ctxlog.FromContext(context.Background())
33
34         var options RunOptions
35         flags := flag.NewFlagSet(prog, flag.ContinueOnError)
36         flags.BoolVar(&options.Once, "once", false,
37                 "balance once and then exit")
38         flags.BoolVar(&options.CommitPulls, "commit-pulls", false,
39                 "send pull requests (make more replicas of blocks that are underreplicated or are not in optimal rendezvous probe order)")
40         flags.BoolVar(&options.CommitTrash, "commit-trash", false,
41                 "send trash requests (delete unreferenced old blocks, and excess replicas of overreplicated blocks)")
42         flags.BoolVar(&options.CommitConfirmedFields, "commit-confirmed-fields", true,
43                 "update collection fields (replicas_confirmed, storage_classes_confirmed, etc.)")
44         dumpFlag := flags.Bool("dump", false, "dump details for each block to stdout")
45         pprofAddr := flags.String("pprof", "", "serve Go profile data at `[addr]:port`")
46         // "show version" is implemented by service.Command, so we
47         // don't need the var here -- we just need the -version flag
48         // to pass flags.Parse().
49         flags.Bool("version", false, "Write version information to stdout and exit 0")
50
51         if *pprofAddr != "" {
52                 go func() {
53                         logrus.Println(http.ListenAndServe(*pprofAddr, nil))
54                 }()
55         }
56
57         loader := config.NewLoader(os.Stdin, logger)
58         loader.SetupFlags(flags)
59
60         munged := loader.MungeLegacyConfigArgs(logger, args, "-legacy-keepbalance-config")
61         err := flags.Parse(munged)
62         if err == flag.ErrHelp {
63                 return 0
64         } else if err != nil {
65                 logger.Errorf("error parsing command line flags: %s", err)
66                 return 2
67         } else if flags.NArg() != 0 {
68                 logger.Errorf("unrecognized command line arguments: %v", flags.Args())
69                 return 2
70         }
71
72         if *dumpFlag {
73                 dumper := logrus.New()
74                 dumper.Out = os.Stdout
75                 dumper.Formatter = &logrus.TextFormatter{}
76                 options.Dumper = dumper
77         }
78
79         // Drop our custom args that would be rejected by the generic
80         // service.Command
81         args = nil
82         dropFlag := map[string]bool{
83                 "once":                    true,
84                 "commit-pulls":            true,
85                 "commit-trash":            true,
86                 "commit-confirmed-fields": true,
87                 "dump":                    true,
88         }
89         flags.Visit(func(f *flag.Flag) {
90                 if !dropFlag[f.Name] {
91                         args = append(args, "-"+f.Name+"="+f.Value.String())
92                 }
93         })
94
95         return service.Command(arvados.ServiceNameKeepbalance,
96                 func(ctx context.Context, cluster *arvados.Cluster, token string, registry *prometheus.Registry) service.Handler {
97                         if !options.Once && cluster.Collections.BalancePeriod == arvados.Duration(0) {
98                                 return service.ErrorHandler(ctx, cluster, fmt.Errorf("cannot start service: Collections.BalancePeriod is zero (if you want to run once and then exit, use the -once flag)"))
99                         }
100
101                         ac, err := arvados.NewClientFromConfig(cluster)
102                         ac.AuthToken = token
103                         if err != nil {
104                                 return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err))
105                         }
106
107                         db, err := sqlx.Open("postgres", cluster.PostgreSQL.Connection.String())
108                         if err != nil {
109                                 return service.ErrorHandler(ctx, cluster, fmt.Errorf("postgresql connection failed: %s", err))
110                         }
111                         if p := cluster.PostgreSQL.ConnectionPool; p > 0 {
112                                 db.SetMaxOpenConns(p)
113                         }
114                         err = db.Ping()
115                         if err != nil {
116                                 return service.ErrorHandler(ctx, cluster, fmt.Errorf("postgresql connection succeeded but ping failed: %s", err))
117                         }
118
119                         if options.Logger == nil {
120                                 options.Logger = ctxlog.FromContext(ctx)
121                         }
122
123                         srv := &Server{
124                                 Cluster:    cluster,
125                                 ArvClient:  ac,
126                                 RunOptions: options,
127                                 Metrics:    newMetrics(registry),
128                                 Logger:     options.Logger,
129                                 Dumper:     options.Dumper,
130                                 DB:         db,
131                         }
132                         srv.Handler = &health.Handler{
133                                 Token:  cluster.ManagementToken,
134                                 Prefix: "/_health/",
135                                 Routes: health.Routes{"ping": srv.CheckHealth},
136                         }
137
138                         go srv.run()
139                         return srv
140                 }).RunCommand(prog, args, stdin, stdout, stderr)
141 }