projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
21204: Merge branch '21204-stable-log-sort' from arvados-workbench2.git
[arvados.git]
/
services
/
keep-balance
/
server.go
diff --git
a/services/keep-balance/server.go
b/services/keep-balance/server.go
index b6806d552a89d750d2fbb51a8dce4faa70903b3e..480791ffa2637da8f282fb28507dbbcb046bcfbf 100644
(file)
--- a/
services/keep-balance/server.go
+++ b/
services/keep-balance/server.go
@@
-2,16
+2,19
@@
//
// SPDX-License-Identifier: AGPL-3.0
//
// SPDX-License-Identifier: AGPL-3.0
-package
main
+package
keepbalance
import (
import (
+ "context"
"net/http"
"os"
"os/signal"
"syscall"
"time"
"net/http"
"os"
"os/signal"
"syscall"
"time"
- "git.curoverse.com/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/lib/controller/dblock"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "github.com/jmoiron/sqlx"
"github.com/sirupsen/logrus"
)
"github.com/sirupsen/logrus"
)
@@
-23,11
+26,11
@@
import (
//
// RunOptions fields are controlled by command line flags.
type RunOptions struct {
//
// RunOptions fields are controlled by command line flags.
type RunOptions struct {
- Once bool
- Commit
Pull
s bool
- C
ommitTrash bool
- Logger logrus.FieldLogger
- Dumper logrus.FieldLogger
+ Once
bool
+ Commit
ConfirmedField
s bool
+ C
hunkPrefix string
+ Logger
logrus.FieldLogger
+ Dumper
logrus.FieldLogger
// SafeRendezvousState from the most recent balance operation,
// or "" if unknown. If this changes from one run to the next,
// SafeRendezvousState from the most recent balance operation,
// or "" if unknown. If this changes from one run to the next,
@@
-46,19
+49,26
@@
type Server struct {
Logger logrus.FieldLogger
Dumper logrus.FieldLogger
Logger logrus.FieldLogger
Dumper logrus.FieldLogger
+
+ DB *sqlx.DB
}
// CheckHealth implements service.Handler.
func (srv *Server) CheckHealth() error {
}
// CheckHealth implements service.Handler.
func (srv *Server) CheckHealth() error {
+ return srv.DB.Ping()
+}
+
+// Done implements service.Handler.
+func (srv *Server) Done() <-chan struct{} {
return nil
}
return nil
}
-func (srv *Server) run() {
+func (srv *Server) run(
ctx context.Context
) {
var err error
if srv.RunOptions.Once {
var err error
if srv.RunOptions.Once {
- _, err = srv.runOnce()
+ _, err = srv.runOnce(
ctx
)
} else {
} else {
- err = srv.runForever(
nil
)
+ err = srv.runForever(
ctx
)
}
if err != nil {
srv.Logger.Error(err)
}
if err != nil {
srv.Logger.Error(err)
@@
-68,39
+78,46
@@
func (srv *Server) run() {
}
}
}
}
-func (srv *Server) runOnce() (*Balancer, error) {
+func (srv *Server) runOnce(
ctx context.Context
) (*Balancer, error) {
bal := &Balancer{
bal := &Balancer{
+ DB: srv.DB,
Logger: srv.Logger,
Dumper: srv.Dumper,
Metrics: srv.Metrics,
LostBlocksFile: srv.Cluster.Collections.BlobMissingReport,
Logger: srv.Logger,
Dumper: srv.Dumper,
Metrics: srv.Metrics,
LostBlocksFile: srv.Cluster.Collections.BlobMissingReport,
+ ChunkPrefix: srv.RunOptions.ChunkPrefix,
}
var err error
}
var err error
- srv.RunOptions, err = bal.Run(srv.ArvClient, srv.Cluster, srv.RunOptions)
+ srv.RunOptions, err = bal.Run(
ctx,
srv.ArvClient, srv.Cluster, srv.RunOptions)
return bal, err
}
return bal, err
}
-// RunForever runs forever, or (for testing purposes) until the given
-// stop channel is ready to receive.
-func (srv *Server) runForever(stop <-chan interface{}) error {
+// RunForever runs forever, or until ctx is cancelled.
+func (srv *Server) runForever(ctx context.Context) error {
logger := srv.Logger
ticker := time.NewTicker(time.Duration(srv.Cluster.Collections.BalancePeriod))
logger := srv.Logger
ticker := time.NewTicker(time.Duration(srv.Cluster.Collections.BalancePeriod))
- // The unbuffered channel here means we only hear SIGUSR1 if
- // it arrives while we're waiting in select{}.
- sigUSR1 := make(chan os.Signal)
+ sigUSR1 := make(chan os.Signal, 1)
signal.Notify(sigUSR1, syscall.SIGUSR1)
signal.Notify(sigUSR1, syscall.SIGUSR1)
+ logger.Info("acquiring service lock")
+ dblock.KeepBalanceService.Lock(ctx, func(context.Context) (*sqlx.DB, error) { return srv.DB, nil })
+ defer dblock.KeepBalanceService.Unlock()
+
logger.Printf("starting up: will scan every %v and on SIGUSR1", srv.Cluster.Collections.BalancePeriod)
for {
logger.Printf("starting up: will scan every %v and on SIGUSR1", srv.Cluster.Collections.BalancePeriod)
for {
- if
!srv.RunOptions.CommitPulls && !srv.RunOptions.CommitTrash
{
+ if
srv.Cluster.Collections.BalancePullLimit < 1 && srv.Cluster.Collections.BalanceTrashLimit < 1
{
logger.Print("WARNING: Will scan periodically, but no changes will be committed.")
logger.Print("WARNING: Will scan periodically, but no changes will be committed.")
- logger.Print("=======
Consider using -commit-pulls and -commit-trash flags
.")
+ logger.Print("=======
To commit changes, set BalancePullLimit and BalanceTrashLimit values greater than zero
.")
}
}
- _, err := srv.runOnce()
+ if !dblock.KeepBalanceService.Check() {
+ // context canceled
+ return nil
+ }
+ _, err := srv.runOnce(ctx)
if err != nil {
logger.Print("run failed: ", err)
} else {
if err != nil {
logger.Print("run failed: ", err)
} else {
@@
-108,7
+125,7
@@
func (srv *Server) runForever(stop <-chan interface{}) error {
}
select {
}
select {
- case <-
stop
:
+ case <-
ctx.Done()
:
signal.Stop(sigUSR1)
return nil
case <-ticker.C:
signal.Stop(sigUSR1)
return nil
case <-ticker.C: