1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.curoverse.com/arvados.git/sdk/go/arvados"
16 "git.curoverse.com/arvados.git/sdk/go/auth"
17 "git.curoverse.com/arvados.git/sdk/go/httpserver"
18 "github.com/sirupsen/logrus"
24 defaultConfigPath = "/etc/arvados/keep-balance/keep-balance.yml"
25 rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
28 // Config specifies site configuration, like API credentials and the
29 // choice of which servers are to be balanced.
31 // Config is loaded from a JSON config file (see usage()).
33 // Arvados API endpoint and credentials.
36 // List of service types (e.g., "disk") to balance.
37 KeepServiceTypes []string
39 KeepServiceList arvados.KeepServiceList
41 // address, address:port, or :port for management interface
44 // token for management APIs
45 ManagementToken string
48 RunPeriod arvados.Duration
50 // Number of collections to request in each API call
51 CollectionBatchSize int
53 // Max collections to buffer in memory (bigger values consume
54 // more memory, but can reduce store-and-forward latency when
58 // Timeout for outgoing http request/response cycle.
59 RequestTimeout arvados.Duration
61 // Destination filename for the list of lost block hashes, one
62 // per line. Updated atomically during each successful run.
66 // RunOptions controls runtime behavior. The flags/options that belong
67 // here are the ones that are useful for interactive use. For example,
68 // "CommitTrash" is a runtime option rather than a config item because
69 // it invokes a troubleshooting feature rather than expressing how
70 // balancing is meant to be done at a given site.
72 // RunOptions fields are controlled by command line flags.
73 type RunOptions struct {
77 Logger logrus.FieldLogger
78 Dumper logrus.FieldLogger
80 // SafeRendezvousState from the most recent balance operation,
81 // or "" if unknown. If this changes from one run to the next,
82 // we need to watch out for races. See
83 // (*Balancer)ClearTrashLists.
84 SafeRendezvousState string
91 listening string // for tests
93 Logger logrus.FieldLogger
94 Dumper logrus.FieldLogger
97 // NewServer returns a new Server that runs Balancers using the given
98 // config and runOptions.
99 func NewServer(config Config, runOptions RunOptions) (*Server, error) {
100 if len(config.KeepServiceList.Items) > 0 && config.KeepServiceTypes != nil {
101 return nil, fmt.Errorf("cannot specify both KeepServiceList and KeepServiceTypes in config")
103 if !runOptions.Once && config.RunPeriod == arvados.Duration(0) {
104 return nil, fmt.Errorf("you must either use the -once flag, or specify RunPeriod in config")
107 if runOptions.Logger == nil {
109 log.Formatter = &logrus.JSONFormatter{
110 TimestampFormat: rfc3339NanoFixed,
113 runOptions.Logger = log
118 runOptions: runOptions,
119 metrics: newMetrics(),
120 Logger: runOptions.Logger,
121 Dumper: runOptions.Dumper,
123 return srv, srv.start()
126 func (srv *Server) start() error {
127 if srv.config.Listen == "" {
130 server := &httpserver.Server{
132 Handler: httpserver.LogRequests(srv.Logger,
133 auth.RequireLiteralToken(srv.config.ManagementToken,
134 srv.metrics.Handler(srv.Logger))),
136 Addr: srv.config.Listen,
138 err := server.Start()
142 srv.Logger.Printf("listening at %s", server.Addr)
143 srv.listening = server.Addr
147 func (srv *Server) Run() (*Balancer, error) {
151 Metrics: srv.metrics,
152 LostBlocksFile: srv.config.LostBlocksFile,
155 srv.runOptions, err = bal.Run(srv.config, srv.runOptions)
159 // RunForever runs forever, or (for testing purposes) until the given
160 // stop channel is ready to receive.
161 func (srv *Server) RunForever(stop <-chan interface{}) error {
162 logger := srv.runOptions.Logger
164 ticker := time.NewTicker(time.Duration(srv.config.RunPeriod))
166 // The unbuffered channel here means we only hear SIGUSR1 if
167 // it arrives while we're waiting in select{}.
168 sigUSR1 := make(chan os.Signal)
169 signal.Notify(sigUSR1, syscall.SIGUSR1)
171 logger.Printf("starting up: will scan every %v and on SIGUSR1", srv.config.RunPeriod)
174 if !srv.runOptions.CommitPulls && !srv.runOptions.CommitTrash {
175 logger.Print("WARNING: Will scan periodically, but no changes will be committed.")
176 logger.Print("======= Consider using -commit-pulls and -commit-trash flags.")
181 logger.Print("run failed: ", err)
183 logger.Print("run succeeded")
191 logger.Print("timer went off")
193 logger.Print("received SIGUSR1, resetting timer")
194 // Reset the timer so we don't start the N+1st
195 // run too soon after the Nth run is triggered
198 ticker = time.NewTicker(time.Duration(srv.config.RunPeriod))
200 logger.Print("starting next run")