19414: Fix concurrent map read/write. 19414-keep-balance-panic
authorTom Clegg <tom@curii.com>
Tue, 23 Aug 2022 15:02:04 +0000 (11:02 -0400)
committerTom Clegg <tom@curii.com>
Tue, 23 Aug 2022 15:02:04 +0000 (11:02 -0400)
Occurred when a block was referenced by a collection but not returned
by any keepstore index, and NumCPU > 2.

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

services/keep-balance/block_state.go
services/keep-balance/block_state_test.go

index 07c9952f906d7e57dfca21d0007c01d96f42294b..63a994096bc8c4e1c6f974592c5ee4cf2b6f364f 100644 (file)
@@ -151,7 +151,11 @@ func (bsm *BlockStateMap) GetConfirmedReplication(blkids []arvados.SizedDigest,
                for _, c := range classes {
                        perclass[c] = 0
                }
-               for _, r := range bsm.get(blkid).Replicas {
+               bs, ok := bsm.entries[blkid]
+               if !ok {
+                       return 0
+               }
+               for _, r := range bs.Replicas {
                        total += r.KeepMount.Replication
                        mntclasses := r.KeepMount.StorageClasses
                        if len(mntclasses) == 0 {
index 8a58be288ff1832a3799224510d04d5d581d2455..c6076bbd3d526c144849cd8890832e08c7df3b65 100644 (file)
@@ -5,6 +5,7 @@
 package keepbalance
 
 import (
+       "sync"
        "time"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
@@ -92,3 +93,25 @@ func (s *confirmedReplicationSuite) TestBlocksOnMultipleMounts(c *check.C) {
        n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(40), knownBlkid(41)}, nil)
        c.Check(n, check.Equals, 4)
 }
+
+func (s *confirmedReplicationSuite) TestConcurrency(c *check.C) {
+       var wg sync.WaitGroup
+       for i := 1000; i < 1256; i++ {
+               i := i
+               wg.Add(1)
+               go func() {
+                       defer wg.Done()
+                       n := s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(i), knownBlkid(i)}, []string{"default"})
+                       c.Check(n, check.Equals, 0)
+               }()
+               wg.Add(1)
+               go func() {
+                       defer wg.Done()
+                       n := s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(10)}, []string{"default"})
+                       c.Check(n, check.Equals, 1)
+                       n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(20)}, []string{"default"})
+                       c.Check(n, check.Equals, 2)
+               }()
+       }
+       wg.Wait()
+}