"math"
"os"
"runtime"
+ "sort"
"strings"
"sync"
"time"
}
// Run performs a balance operation using the given config and
-// runOptions. It should only be called once on a given Balancer
-// object. Typical usage:
+// runOptions, and returns RunOptions suitable for passing to a
+// subsequent balance operation.
//
-// err = (&Balancer{}).Run(config, runOptions)
-func (bal *Balancer) Run(config Config, runOptions RunOptions) (err error) {
+// Run should only be called once on a given Balancer object.
+//
+// Typical usage:
+//
+// runOptions, err = (&Balancer{}).Run(config, runOptions)
+func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions RunOptions, err error) {
+ nextRunOptions = runOptions
+
bal.Dumper = runOptions.Dumper
bal.Logger = runOptions.Logger
if bal.Logger == nil {
if err = bal.CheckSanityEarly(&config.Client); err != nil {
return
}
- if runOptions.CommitTrash {
+ rs := bal.rendezvousState()
+ if runOptions.CommitTrash && rs != runOptions.SafeRendezvousState {
+ if runOptions.SafeRendezvousState != "" {
+ bal.logf("notice: KeepServices list has changed since last run")
+ }
+ bal.logf("clearing existing trash lists, in case the new rendezvous order differs from previous run")
if err = bal.ClearTrashLists(&config.Client); err != nil {
return
}
+ // The current rendezvous state becomes "safe" (i.e.,
+ // OK to compute changes for that state without
+ // clearing existing trash lists) only now, after we
+ // succeed in clearing existing trash lists.
+ nextRunOptions.SafeRendezvousState = rs
}
if err = bal.GetCurrentState(&config.Client, config.CollectionBatchSize, config.CollectionBuffers); err != nil {
return
return nil
}
+// rendezvousState returns a fingerprint (e.g., a sorted list of
+// UUID+host+port) of the current set of keep services.
+func (bal *Balancer) rendezvousState() string {
+ srvs := make([]string, 0, len(bal.KeepServices))
+ for _, srv := range bal.KeepServices {
+ srvs = append(srvs, srv.String())
+ }
+ sort.Strings(srvs)
+ return strings.Join(srvs, "; ")
+}
+
// ClearTrashLists sends an empty trash list to each keep
// service. Calling this before GetCurrentState avoids races.
//
s.stub.serveKeepstoreIndexFoo4Bar1()
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
- err := (&Balancer{}).Run(s.config, opts)
+ _, err := (&Balancer{}).Run(s.config, opts)
c.Check(err, check.ErrorMatches, "received zero collections")
c.Check(trashReqs.Count(), check.Equals, 4)
c.Check(pullReqs.Count(), check.Equals, 0)
s.stub.serveFourDiskKeepServices()
indexReqs := s.stub.serveKeepstoreIndexFoo4Bar1()
trashReqs := s.stub.serveKeepstoreTrash()
- err := (&Balancer{}).Run(s.config, opts)
+ _, err := (&Balancer{}).Run(s.config, opts)
c.Check(err, check.IsNil)
c.Check(indexReqs.Count(), check.Equals, 0)
c.Check(trashReqs.Count(), check.Equals, 0)
s.stub.serveFourDiskKeepServices()
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
- err := (&Balancer{}).Run(s.config, opts)
+ _, err := (&Balancer{}).Run(s.config, opts)
c.Check(err, check.ErrorMatches, "current user .* is not .* admin user")
c.Check(trashReqs.Count(), check.Equals, 0)
c.Check(pullReqs.Count(), check.Equals, 0)
s.stub.serveKeepstoreIndexFoo4Bar1()
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
- err := (&Balancer{}).Run(s.config, opts)
+ _, err := (&Balancer{}).Run(s.config, opts)
c.Check(err, check.ErrorMatches, `Retrieved 2 collections with modtime <= .* but server now reports there are 3 collections.*`)
c.Check(trashReqs.Count(), check.Equals, 4)
c.Check(pullReqs.Count(), check.Equals, 0)
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
var bal Balancer
- err := bal.Run(s.config, opts)
+ _, err := bal.Run(s.config, opts)
c.Check(err, check.IsNil)
c.Check(trashReqs.Count(), check.Equals, 0)
c.Check(pullReqs.Count(), check.Equals, 0)
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
var bal Balancer
- err := bal.Run(s.config, opts)
+ _, err := bal.Run(s.config, opts)
c.Check(err, check.IsNil)
c.Check(trashReqs.Count(), check.Equals, 8)
c.Check(pullReqs.Count(), check.Equals, 4)
s.config.RunPeriod = arvados.Duration(time.Millisecond)
go RunForever(s.config, opts, stop)
- // Each run should send 4 clear trash lists + 4 pull lists + 4
- // trash lists. We should complete four runs in much less than
+ // Each run should send 4 pull lists + 4 trash lists. The
+ // first run should also send 4 empty trash lists at
+ // startup. We should complete all four runs in much less than
// a second.
for t0 := time.Now(); pullReqs.Count() < 16 && time.Since(t0) < 10*time.Second; {
time.Sleep(time.Millisecond)
}
stop <- true
c.Check(pullReqs.Count() >= 16, check.Equals, true)
- c.Check(trashReqs.Count(), check.Equals, 2*pullReqs.Count())
+ c.Check(trashReqs.Count(), check.Equals, pullReqs.Count() + 4)
}
CommitTrash bool
Logger *log.Logger
Dumper *log.Logger
+
+ // SafeRendezvousState from the most recent balance operation,
+ // or "" if unknown. If this changes from one run to the next,
+ // we need to watch out for races. See
+ // (*Balancer)ClearTrashLists.
+ SafeRendezvousState string
}
var debugf = func(string, ...interface{}) {}
if err != nil {
// (don't run)
} else if runOptions.Once {
- err = (&Balancer{}).Run(config, runOptions)
+ _, err = (&Balancer{}).Run(config, runOptions)
} else {
err = RunForever(config, runOptions, nil)
}
logger.Print("======= Consider using -commit-pulls and -commit-trash flags.")
}
- err := (&Balancer{}).Run(config, runOptions)
+ bal := &Balancer{}
+ var err error
+ runOptions, err = bal.Run(config, runOptions)
if err != nil {
logger.Print("run failed: ", err)
} else {