From 58266ff0dc0420cd99c4cb024476115a3dd9b5e7 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Tue, 23 Aug 2022 11:02:04 -0400 Subject: [PATCH] 19414: Fix concurrent map read/write. Occurred when a block was referenced by a collection but not returned by any keepstore index, and NumCPU > 2. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- services/keep-balance/block_state.go | 6 +++++- services/keep-balance/block_state_test.go | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/services/keep-balance/block_state.go b/services/keep-balance/block_state.go index 07c9952f90..63a994096b 100644 --- a/services/keep-balance/block_state.go +++ b/services/keep-balance/block_state.go @@ -151,7 +151,11 @@ func (bsm *BlockStateMap) GetConfirmedReplication(blkids []arvados.SizedDigest, for _, c := range classes { perclass[c] = 0 } - for _, r := range bsm.get(blkid).Replicas { + bs, ok := bsm.entries[blkid] + if !ok { + return 0 + } + for _, r := range bs.Replicas { total += r.KeepMount.Replication mntclasses := r.KeepMount.StorageClasses if len(mntclasses) == 0 { diff --git a/services/keep-balance/block_state_test.go b/services/keep-balance/block_state_test.go index 8a58be288f..c6076bbd3d 100644 --- a/services/keep-balance/block_state_test.go +++ b/services/keep-balance/block_state_test.go @@ -5,6 +5,7 @@ package keepbalance import ( + "sync" "time" "git.arvados.org/arvados.git/sdk/go/arvados" @@ -92,3 +93,25 @@ func (s *confirmedReplicationSuite) TestBlocksOnMultipleMounts(c *check.C) { n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(40), knownBlkid(41)}, nil) c.Check(n, check.Equals, 4) } + +func (s *confirmedReplicationSuite) TestConcurrency(c *check.C) { + var wg sync.WaitGroup + for i := 1000; i < 1256; i++ { + i := i + wg.Add(1) + go func() { + defer wg.Done() + n := s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(i), knownBlkid(i)}, []string{"default"}) + c.Check(n, check.Equals, 0) + }() + wg.Add(1) + go func() { + defer wg.Done() + n := s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(10)}, []string{"default"}) + c.Check(n, check.Equals, 1) + n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(20)}, []string{"default"}) + c.Check(n, check.Equals, 2) + }() + } + wg.Wait() +} -- 2.30.2