X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/10bfcf75847ada2dee77a276b4cc912b43acc936..09cbdc3074b3f1e69c9c537875146f6da0a6ed8f:/services/keep-balance/balance.go diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go index bb590e13b3..9f581751d9 100644 --- a/services/keep-balance/balance.go +++ b/services/keep-balance/balance.go @@ -2,12 +2,13 @@ // // SPDX-License-Identifier: AGPL-3.0 -package main +package keepbalance import ( "bytes" "context" "crypto/md5" + "errors" "fmt" "io" "io/ioutil" @@ -22,7 +23,9 @@ import ( "syscall" "time" + "git.arvados.org/arvados.git/lib/controller/dblock" "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/ctxlog" "git.arvados.org/arvados.git/sdk/go/keepclient" "github.com/jmoiron/sqlx" "github.com/sirupsen/logrus" @@ -66,16 +69,19 @@ type Balancer struct { // subsequent balance operation. // // Run should only be called once on a given Balancer object. -// -// Typical usage: -// -// runOptions, err = (&Balancer{}).Run(config, runOptions) -func (bal *Balancer) Run(client *arvados.Client, cluster *arvados.Cluster, runOptions RunOptions) (nextRunOptions RunOptions, err error) { +func (bal *Balancer) Run(ctx context.Context, client *arvados.Client, cluster *arvados.Cluster, runOptions RunOptions) (nextRunOptions RunOptions, err error) { nextRunOptions = runOptions + ctxlog.FromContext(ctx).Info("acquiring active lock") + if !dblock.KeepBalanceActive.Lock(ctx, func(context.Context) (*sqlx.DB, error) { return bal.DB, nil }) { + // context canceled + return + } + defer dblock.KeepBalanceActive.Unlock() + defer bal.time("sweep", "wall clock time to run one full sweep")() - ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(cluster.Collections.BalanceTimeout.Duration())) + ctx, cancel := context.WithDeadline(ctx, time.Now().Add(cluster.Collections.BalanceTimeout.Duration())) defer cancel() var lbFile *os.File @@ -217,8 +223,8 @@ func (bal *Balancer) cleanupMounts() { rwdev := map[string]*KeepService{} for _, srv := range bal.KeepServices { for _, mnt := range srv.mounts { - if !mnt.ReadOnly && mnt.DeviceID != "" { - rwdev[mnt.DeviceID] = srv + if !mnt.ReadOnly { + rwdev[mnt.UUID] = srv } } } @@ -227,8 +233,8 @@ func (bal *Balancer) cleanupMounts() { for _, srv := range bal.KeepServices { var dedup []*KeepMount for _, mnt := range srv.mounts { - if mnt.ReadOnly && rwdev[mnt.DeviceID] != nil { - bal.logf("skipping srv %s readonly mount %q because same device %q is mounted read-write on srv %s", srv, mnt.UUID, mnt.DeviceID, rwdev[mnt.DeviceID]) + if mnt.ReadOnly && rwdev[mnt.UUID] != nil { + bal.logf("skipping srv %s readonly mount %q because same volume is mounted read-write on srv %s", srv, mnt.UUID, rwdev[mnt.UUID]) } else { dedup = append(dedup, mnt) } @@ -266,6 +272,29 @@ func (bal *Balancer) CheckSanityEarly(c *arvados.Client) error { } } + mountProblem := false + type deviceMount struct { + srv *KeepService + mnt *KeepMount + } + deviceMounted := map[string]deviceMount{} // DeviceID -> mount + for _, srv := range bal.KeepServices { + for _, mnt := range srv.mounts { + if first, dup := deviceMounted[mnt.DeviceID]; dup && first.mnt.UUID != mnt.UUID && mnt.DeviceID != "" { + bal.logf("config error: device %s is mounted with multiple volume UUIDs: %s on %s, and %s on %s", + mnt.DeviceID, + first.mnt.UUID, first.srv, + mnt.UUID, srv) + mountProblem = true + continue + } + deviceMounted[mnt.DeviceID] = deviceMount{srv, mnt} + } + } + if mountProblem { + return errors.New("cannot continue with config errors (see above)") + } + var checkPage arvados.CollectionList if err = c.RequestAndDecode(&checkPage, "GET", "arvados/v1/collections", nil, arvados.ResourceListParams{ Limit: new(int), @@ -357,12 +386,10 @@ func (bal *Balancer) GetCurrentState(ctx context.Context, c *arvados.Client, pag deviceMount := map[string]*KeepMount{} for _, srv := range bal.KeepServices { for _, mnt := range srv.mounts { - equiv := deviceMount[mnt.DeviceID] + equiv := deviceMount[mnt.UUID] if equiv == nil { equiv = mnt - if mnt.DeviceID != "" { - deviceMount[mnt.DeviceID] = equiv - } + deviceMount[mnt.UUID] = equiv } equivMount[equiv] = append(equivMount[equiv], mnt) } @@ -667,7 +694,7 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba // new/remaining replicas uniformly // across qualifying mounts on a given // server. - return rendezvousLess(si.mnt.DeviceID, sj.mnt.DeviceID, blkid) + return rendezvousLess(si.mnt.UUID, sj.mnt.UUID, blkid) } }) @@ -692,7 +719,7 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba // and returns true if all requirements are met. trySlot := func(i int) bool { slot := slots[i] - if wantMnt[slot.mnt] || wantDev[slot.mnt.DeviceID] { + if wantMnt[slot.mnt] || wantDev[slot.mnt.UUID] { // Already allocated a replica to this // backend device, possibly on a // different server. @@ -707,9 +734,7 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba slots[i].want = true wantSrv[slot.mnt.KeepService] = true wantMnt[slot.mnt] = true - if slot.mnt.DeviceID != "" { - wantDev[slot.mnt.DeviceID] = true - } + wantDev[slot.mnt.UUID] = true replWant += slot.mnt.Replication } return replProt >= desired && replWant >= desired @@ -751,7 +776,7 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba // haven't already been added to unsafeToDelete // because the servers report different Mtimes. for _, slot := range slots { - if slot.repl != nil && wantDev[slot.mnt.DeviceID] { + if slot.repl != nil && wantDev[slot.mnt.UUID] { unsafeToDelete[slot.repl.Mtime] = true } } @@ -834,7 +859,7 @@ func computeBlockState(slots []slot, onlyCount map[*KeepMount]bool, have, needRe if onlyCount != nil && !onlyCount[slot.mnt] { continue } - if countedDev[slot.mnt.DeviceID] { + if countedDev[slot.mnt.UUID] { continue } switch { @@ -848,9 +873,7 @@ func computeBlockState(slots []slot, onlyCount map[*KeepMount]bool, have, needRe bbs.pulling++ repl += slot.mnt.Replication } - if slot.mnt.DeviceID != "" { - countedDev[slot.mnt.DeviceID] = true - } + countedDev[slot.mnt.UUID] = true } if repl < needRepl { bbs.unachievable = true