13 "git.curoverse.com/arvados.git/sdk/go/arvados"
16 // Config specifies site configuration, like API credentials and the
17 // choice of which servers are to be balanced.
19 // Config is loaded from a JSON config file (see usage()).
21 // Arvados API endpoint and credentials.
24 // List of service types (e.g., "disk") to balance.
25 KeepServiceTypes []string
27 KeepServiceList arvados.KeepServiceList
30 RunPeriod arvados.Duration
32 // Number of collections to request in each API call
33 CollectionBatchSize int
35 // Max collections to buffer in memory (bigger values consume
36 // more memory, but can reduce store-and-forward latency when
41 // RunOptions controls runtime behavior. The flags/options that belong
42 // here are the ones that are useful for interactive use. For example,
43 // "CommitTrash" is a runtime option rather than a config item because
44 // it invokes a troubleshooting feature rather than expressing how
45 // balancing is meant to be done at a given site.
47 // RunOptions fields are controlled by command line flags.
48 type RunOptions struct {
56 var debugf = func(string, ...interface{}) {}
60 var runOptions RunOptions
62 configPath := flag.String("config", "",
63 "`path` of json configuration file")
64 serviceListPath := flag.String("config.KeepServiceList", "",
65 "`path` of json file with list of keep services to balance, as given by \"arv keep_service list\" "+
66 "(default: config[\"KeepServiceList\"], or if none given, get all available services and filter by config[\"KeepServiceTypes\"])")
67 flag.BoolVar(&runOptions.Once, "once", false,
68 "balance once and then exit")
69 flag.BoolVar(&runOptions.CommitPulls, "commit-pulls", false,
70 "send pull requests (make more replicas of blocks that are underreplicated or are not in optimal rendezvous probe order)")
71 flag.BoolVar(&runOptions.CommitTrash, "commit-trash", false,
72 "send trash requests (delete unreferenced old blocks, and excess replicas of overreplicated blocks)")
73 dumpFlag := flag.Bool("dump", false, "dump details for each block to stdout")
74 debugFlag := flag.Bool("debug", false, "enable debug messages")
78 if *configPath == "" {
79 log.Fatal("You must specify a config file (see `keep-balance -help`)")
81 mustReadJSON(&config, *configPath)
82 if *serviceListPath != "" {
83 mustReadJSON(&config.KeepServiceList, *serviceListPath)
88 if j, err := json.Marshal(config); err != nil {
91 log.Printf("config is %s", j)
95 runOptions.Dumper = log.New(os.Stdout, "", log.LstdFlags)
97 err := CheckConfig(config, runOptions)
100 } else if runOptions.Once {
101 err = (&Balancer{}).Run(config, runOptions)
103 err = RunForever(config, runOptions, nil)
110 func mustReadJSON(dst interface{}, path string) {
111 if buf, err := ioutil.ReadFile(path); err != nil {
112 log.Fatalf("Reading %q: %v", path, err)
113 } else if err = json.Unmarshal(buf, dst); err != nil {
114 log.Fatalf("Decoding %q: %v", path, err)
118 // RunForever runs forever, or (for testing purposes) until the given
119 // stop channel is ready to receive.
120 func RunForever(config Config, runOptions RunOptions, stop <-chan interface{}) error {
121 if runOptions.Logger == nil {
122 runOptions.Logger = log.New(os.Stderr, "", log.LstdFlags)
124 logger := runOptions.Logger
126 ticker := time.NewTicker(time.Duration(config.RunPeriod))
128 // The unbuffered channel here means we only hear SIGUSR1 if
129 // it arrives while we're waiting in select{}.
130 sigUSR1 := make(chan os.Signal)
131 signal.Notify(sigUSR1, syscall.SIGUSR1)
133 logger.Printf("starting up: will scan every %v and on SIGUSR1", config.RunPeriod)
136 if !runOptions.CommitPulls && !runOptions.CommitTrash {
137 logger.Print("WARNING: Will scan periodically, but no changes will be committed.")
138 logger.Print("======= Consider using -commit-pulls and -commit-trash flags.")
141 err := (&Balancer{}).Run(config, runOptions)
143 logger.Print("run failed: ", err)
145 logger.Print("run succeeded")
153 logger.Print("timer went off")
155 logger.Print("received SIGUSR1, resetting timer")
156 // Reset the timer so we don't start the N+1st
157 // run too soon after the Nth run is triggered
160 ticker = time.NewTicker(time.Duration(config.RunPeriod))
162 logger.Print("starting next run")