From 9f4f850c2818cbf5eea45e30d27b13c20bd2be0c Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 23 Mar 2023 14:36:55 -0400 Subject: [PATCH] 20242: Trash only one when identical replicas are eligible to trash. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- services/keep-balance/balance.go | 46 ++++++++++++++++++++++----- services/keep-balance/balance_test.go | 29 +++++++++++++++++ 2 files changed, 67 insertions(+), 8 deletions(-) diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go index 33c907c203..215c5e1b5b 100644 --- a/services/keep-balance/balance.go +++ b/services/keep-balance/balance.go @@ -829,19 +829,49 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba } blockState := computeBlockState(slots, nil, len(blk.Replicas), 0) - var lost bool - var changes []string + // Sort the slots by rendezvous order. This ensures "trash the + // first of N replicas with identical timestamps" is + // predictable (helpful for testing) and well distributed + // across servers. + sort.Slice(slots, func(i, j int) bool { + si, sj := slots[i], slots[j] + if orderi, orderj := srvRendezvous[si.mnt.KeepService], srvRendezvous[sj.mnt.KeepService]; orderi != orderj { + return orderi < orderj + } else { + return rendezvousLess(si.mnt.UUID, sj.mnt.UUID, blkid) + } + }) + + var ( + lost bool + changes []string + trashedMtime = make(map[int64]bool, len(slots)) + ) for _, slot := range slots { // TODO: request a Touch if Mtime is duplicated. var change int switch { case !slot.want && slot.repl != nil && slot.repl.Mtime < bal.MinMtime: - slot.mnt.KeepService.AddTrash(Trash{ - SizedDigest: blkid, - Mtime: slot.repl.Mtime, - From: slot.mnt, - }) - change = changeTrash + if trashedMtime[slot.repl.Mtime] { + // Don't trash multiple replicas with + // identical timestamps. If they are + // multiple views of the same backing + // storage, asking both servers to + // trash is redundant and can cause + // races (see #20242). If they are + // distinct replicas that happen to + // have identical timestamps, we'll + // get this one on the next sweep. + change = changeNone + } else { + slot.mnt.KeepService.AddTrash(Trash{ + SizedDigest: blkid, + Mtime: slot.repl.Mtime, + From: slot.mnt, + }) + change = changeTrash + trashedMtime[slot.repl.Mtime] = true + } case slot.repl == nil && slot.want && len(blk.Replicas) == 0: lost = true change = changeNone diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go index 6626609b57..f9fca1431b 100644 --- a/services/keep-balance/balance_test.go +++ b/services/keep-balance/balance_test.go @@ -321,6 +321,35 @@ func (bal *balancerSuite) TestDecreaseReplTimestampCollision(c *check.C) { desired: map[string]int{"default": 2}, current: slots{0, 1, 2}, timestamps: []int64{12345678, 10000000, 10000000}}) + bal.try(c, tester{ + desired: map[string]int{"default": 0}, + current: slots{0, 1, 2}, + timestamps: []int64{12345678, 12345678, 12345678}, + shouldTrash: slots{0}, + shouldTrashMounts: []string{ + bal.srvs[bal.knownRendezvous[0][0]].mounts[0].UUID}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{0, 1, 2, 5, 6}, + timestamps: []int64{12345678, 12345679, 10000000, 10000000, 10000000}, + shouldTrash: slots{2}, + shouldTrashMounts: []string{ + bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{0, 1, 2, 5, 6}, + timestamps: []int64{12345678, 12345679, 12345671, 10000000, 10000000}, + shouldTrash: slots{2, 5}, + shouldTrashMounts: []string{ + bal.srvs[bal.knownRendezvous[0][2]].mounts[0].UUID, + bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}}) + bal.try(c, tester{ + desired: map[string]int{"default": 2}, + current: slots{0, 1, 2, 5, 6}, + timestamps: []int64{12345678, 12345679, 12345679, 10000000, 10000000}, + shouldTrash: slots{5}, + shouldTrashMounts: []string{ + bal.srvs[bal.knownRendezvous[0][5]].mounts[0].UUID}}) } func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) { -- 2.30.2