13407: Take keepstore-reported volume replication into account.
authorTom Clegg <tclegg@veritasgenetics.com>
Mon, 11 Jun 2018 19:36:21 +0000 (15:36 -0400)
committerTom Clegg <tclegg@veritasgenetics.com>
Mon, 11 Jun 2018 19:36:21 +0000 (15:36 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

services/keep-balance/balance.go
services/keep-balance/balance_test.go

index c7fa88230307e3eb47d06fac094b4768f3c7e2c8..d1c567f4c19b2212bd83de3c3da8a94672eab3e7 100644 (file)
@@ -95,7 +95,7 @@ func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions R
                        return
                }
        }
-       bal.dedupDevices()
+       bal.cleanupMounts()
 
        if err = bal.CheckSanityEarly(&config.Client); err != nil {
                return
@@ -170,7 +170,7 @@ func (bal *Balancer) DiscoverKeepServices(c *arvados.Client, okTypes []string) e
        })
 }
 
-func (bal *Balancer) dedupDevices() {
+func (bal *Balancer) cleanupMounts() {
        rwdev := map[string]*KeepService{}
        for _, srv := range bal.KeepServices {
                for _, mnt := range srv.mounts {
@@ -192,6 +192,14 @@ func (bal *Balancer) dedupDevices() {
                }
                srv.mounts = dedup
        }
+       for _, srv := range bal.KeepServices {
+               for _, mnt := range srv.mounts {
+                       if mnt.Replication <= 0 {
+                               log.Printf("%s: mount %s reports replication=%d, using replication=1", srv, mnt.UUID, mnt.Replication)
+                               mnt.Replication = 1
+                       }
+               }
+       }
 }
 
 // CheckSanityEarly checks for configuration and runtime errors that
@@ -614,30 +622,36 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
                // trashing replicas that aren't optimal positions for
                // any storage class.
                protMnt := map[*KeepMount]bool{}
+               // Replication planned so far (corresponds to wantMnt).
+               replWant := 0
+               // Protected replication (corresponds to protMnt).
+               replProt := 0
 
                // trySlot tries using a slot to meet requirements,
                // and returns true if all requirements are met.
                trySlot := func(i int) bool {
                        slot := slots[i]
-                       if wantDev[slot.mnt.DeviceID] {
+                       if wantMnt[slot.mnt] || wantDev[slot.mnt.DeviceID] {
                                // Already allocated a replica to this
                                // backend device, possibly on a
                                // different server.
                                return false
                        }
-                       if len(protMnt) < desired && slot.repl != nil {
+                       if replProt < desired && slot.repl != nil && !protMnt[slot.mnt] {
                                unsafeToDelete[slot.repl.Mtime] = true
                                protMnt[slot.mnt] = true
+                               replProt += slot.mnt.Replication
                        }
-                       if len(wantMnt) < desired && (slot.repl != nil || !slot.mnt.ReadOnly) {
+                       if replWant < desired && (slot.repl != nil || !slot.mnt.ReadOnly) {
                                slots[i].want = true
                                wantSrv[slot.mnt.KeepService] = true
                                wantMnt[slot.mnt] = true
                                if slot.mnt.DeviceID != "" {
                                        wantDev[slot.mnt.DeviceID] = true
                                }
+                               replWant += slot.mnt.Replication
                        }
-                       return len(protMnt) >= desired && len(wantMnt) >= desired
+                       return replProt >= desired && replWant >= desired
                }
 
                // First try to achieve desired replication without
@@ -664,7 +678,7 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
                                if slot.repl == nil || !bal.mountsByClass[class][slot.mnt] {
                                        continue
                                }
-                               if safe++; safe >= desired {
+                               if safe += slot.mnt.Replication; safe >= desired {
                                        break
                                }
                        }
index 8650de141cd245239754e98c474e9783acf6e8f2..4baa7679a354ac81d2c66cd78271b895ece85b0c 100644 (file)
@@ -92,6 +92,7 @@ func (bal *balancerSuite) SetUpTest(c *check.C) {
        }
 
        bal.MinMtime = time.Now().UnixNano() - bal.signatureTTL*1e9
+       bal.cleanupMounts()
 }
 
 func (bal *balancerSuite) TestPerfect(c *check.C) {
@@ -247,12 +248,12 @@ func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
                shouldTrash: slots{2}})
 }
 
-func (bal *balancerSuite) TestDedupDevices(c *check.C) {
+func (bal *balancerSuite) TestCleanupMounts(c *check.C) {
        bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
        bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
        bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
        c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
-       bal.dedupDevices()
+       bal.cleanupMounts()
        c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
        bal.try(c, tester{
                known:      0,
@@ -261,6 +262,80 @@ func (bal *balancerSuite) TestDedupDevices(c *check.C) {
                shouldPull: slots{2}})
 }
 
+func (bal *balancerSuite) TestVolumeReplication(c *check.C) {
+       bal.srvs[0].mounts[0].KeepMount.Replication = 2  // srv 0
+       bal.srvs[14].mounts[0].KeepMount.Replication = 2 // srv e
+       bal.cleanupMounts()
+       // block 0 rendezvous is 3,e,a -- so slot 1 has repl=2
+       bal.try(c, tester{
+               known:      0,
+               desired:    map[string]int{"default": 2},
+               current:    slots{1},
+               shouldPull: slots{0}})
+       bal.try(c, tester{
+               known:      0,
+               desired:    map[string]int{"default": 2},
+               current:    slots{0, 1},
+               shouldPull: nil})
+       bal.try(c, tester{
+               known:       0,
+               desired:     map[string]int{"default": 2},
+               current:     slots{0, 1, 2},
+               shouldTrash: slots{2}})
+       bal.try(c, tester{
+               known:       0,
+               desired:     map[string]int{"default": 3},
+               current:     slots{0, 2, 3, 4},
+               shouldPull:  slots{1},
+               shouldTrash: slots{4}})
+       bal.try(c, tester{
+               known:       0,
+               desired:     map[string]int{"default": 3},
+               current:     slots{0, 1, 2, 3, 4},
+               shouldTrash: slots{2, 3, 4}})
+       bal.try(c, tester{
+               known:       0,
+               desired:     map[string]int{"default": 4},
+               current:     slots{0, 1, 2, 3, 4},
+               shouldTrash: slots{3, 4}})
+       // block 1 rendezvous is 0,9,7 -- so slot 0 has repl=2
+       bal.try(c, tester{
+               known:   1,
+               desired: map[string]int{"default": 2},
+               current: slots{0}})
+       bal.try(c, tester{
+               known:      1,
+               desired:    map[string]int{"default": 3},
+               current:    slots{0},
+               shouldPull: slots{1}})
+       bal.try(c, tester{
+               known:      1,
+               desired:    map[string]int{"default": 4},
+               current:    slots{0},
+               shouldPull: slots{1, 2}})
+       bal.try(c, tester{
+               known:      1,
+               desired:    map[string]int{"default": 4},
+               current:    slots{2},
+               shouldPull: slots{0, 1}})
+       bal.try(c, tester{
+               known:      1,
+               desired:    map[string]int{"default": 4},
+               current:    slots{7},
+               shouldPull: slots{0, 1, 2}})
+       bal.try(c, tester{
+               known:       1,
+               desired:     map[string]int{"default": 2},
+               current:     slots{1, 2, 3, 4},
+               shouldPull:  slots{0},
+               shouldTrash: slots{3, 4}})
+       bal.try(c, tester{
+               known:       1,
+               desired:     map[string]int{"default": 2},
+               current:     slots{0, 1, 2},
+               shouldTrash: slots{1, 2}})
+}
+
 func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
        bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
        bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"