15148: Include list of affected PDHs in LostBlocksFile.
authorTom Clegg <tclegg@veritasgenetics.com>
Fri, 26 Apr 2019 20:06:39 +0000 (16:06 -0400)
committerTom Clegg <tclegg@veritasgenetics.com>
Fri, 26 Apr 2019 20:06:39 +0000 (16:06 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

services/keep-balance/balance.go
services/keep-balance/balance_run_test.go
services/keep-balance/block_state.go

index fc5812cde34ae25cf38d5d5dd5c88f6864cd78b2..08a6c5881c51e32951d475ec49b37a833f360532 100644 (file)
@@ -450,7 +450,13 @@ func (bal *Balancer) addCollection(coll arvados.Collection) error {
                repl = *coll.ReplicationDesired
        }
        debugf("%v: %d block x%d", coll.UUID, len(blkids), repl)
-       bal.BlockStateMap.IncreaseDesired(coll.StorageClassesDesired, repl, blkids)
+       // Pass pdh to IncreaseDesired only if LostBlocksFile is being
+       // written -- otherwise it's just a waste of memory.
+       pdh := ""
+       if bal.LostBlocksFile != "" {
+               pdh = coll.PortableDataHash
+       }
+       bal.BlockStateMap.IncreaseDesired(pdh, coll.StorageClassesDesired, repl, blkids)
        return nil
 }
 
@@ -924,7 +930,11 @@ func (bal *Balancer) collectStatistics(results <-chan balanceResult) {
                        s.lost.replicas -= surplus
                        s.lost.blocks++
                        s.lost.bytes += bytes * int64(-surplus)
-                       fmt.Fprintf(bal.lostBlocks, "%s\n", strings.SplitN(string(result.blkid), "+", 2)[0])
+                       fmt.Fprintf(bal.lostBlocks, "%s", strings.SplitN(string(result.blkid), "+", 2)[0])
+                       for pdh := range result.blk.Refs {
+                               fmt.Fprintf(bal.lostBlocks, " %s", pdh)
+                       }
+                       fmt.Fprint(bal.lostBlocks, "\n")
                case surplus < 0:
                        s.underrep.replicas -= surplus
                        s.underrep.blocks++
index ee7aeb9c867de3139a30ba50d9c6b67309c20cf0..db530bc4926de88502132730f35c816ec3cf92b6 100644 (file)
@@ -452,7 +452,7 @@ func (s *runSuite) TestWriteLostBlocks(c *check.C) {
        c.Check(err, check.IsNil)
        lost, err := ioutil.ReadFile(lostf.Name())
        c.Assert(err, check.IsNil)
-       c.Check(string(lost), check.Equals, "37b51d194a7513e45b56f6524f2d51f2\n")
+       c.Check(string(lost), check.Equals, "37b51d194a7513e45b56f6524f2d51f2 fa7aeb5140e2848d39b416daeef4ffc5+45\n")
 }
 
 func (s *runSuite) TestDryRun(c *check.C) {
index 46e69059c9c796c5b23318f8f9b78b4f3c83651e..d9338d0f9b9ef7ce9f4970fb131445c4ce508415 100644 (file)
@@ -23,6 +23,7 @@ type Replica struct {
 // replicas actually stored (according to the keepstore indexes we
 // know about).
 type BlockState struct {
+       Refs     map[string]bool // pdh => true (only tracked when len(Replicas)==0)
        RefCount int
        Replicas []Replica
        Desired  map[string]int
@@ -40,9 +41,21 @@ var defaultClasses = []string{"default"}
 
 func (bs *BlockState) addReplica(r Replica) {
        bs.Replicas = append(bs.Replicas, r)
+       // Free up memory wasted by tracking PDHs that will never be
+       // reported (see comment in increaseDesired)
+       bs.Refs = nil
 }
 
-func (bs *BlockState) increaseDesired(classes []string, n int) {
+func (bs *BlockState) increaseDesired(pdh string, classes []string, n int) {
+       if pdh != "" && len(bs.Replicas) == 0 {
+               // Note we only track PDHs if there's a possibility
+               // that we will report the list of referring PDHs,
+               // i.e., if we haven't yet seen a replica.
+               if bs.Refs == nil {
+                       bs.Refs = map[string]bool{}
+               }
+               bs.Refs[pdh] = true
+       }
        bs.RefCount++
        if len(classes) == 0 {
                classes = defaultClasses
@@ -109,11 +122,14 @@ func (bsm *BlockStateMap) AddReplicas(mnt *KeepMount, idx []arvados.KeepServiceI
 
 // IncreaseDesired updates the map to indicate the desired replication
 // for the given blocks in the given storage class is at least n.
-func (bsm *BlockStateMap) IncreaseDesired(classes []string, n int, blocks []arvados.SizedDigest) {
+//
+// If pdh is non-empty, it will be tracked and reported in the "lost
+// blocks" report.
+func (bsm *BlockStateMap) IncreaseDesired(pdh string, classes []string, n int, blocks []arvados.SizedDigest) {
        bsm.mutex.Lock()
        defer bsm.mutex.Unlock()
 
        for _, blkid := range blocks {
-               bsm.get(blkid).increaseDesired(classes, n)
+               bsm.get(blkid).increaseDesired(pdh, classes, n)
        }
 }