Merge branch '14328-watch-docker-ps'
[arvados.git] / services / keep-balance / balance.go
index 333a4fbde99b8470ed25fb45503baf2f2d5241a1..e1b207805b58a81837eab51c80f8ce5e5e8df186 100644 (file)
@@ -263,7 +263,7 @@ func (bal *Balancer) GetCurrentState(c *arvados.Client, pageSize, bufs int) erro
        bal.DefaultReplication = dd.DefaultCollectionReplication
        bal.MinMtime = time.Now().UnixNano() - dd.BlobSignatureTTL*1e9
 
-       errs := make(chan error, 2+len(bal.KeepServices))
+       errs := make(chan error, 1)
        wg := sync.WaitGroup{}
 
        // When a device is mounted more than once, we will get its
@@ -298,7 +298,10 @@ func (bal *Balancer) GetCurrentState(c *arvados.Client, pageSize, bufs int) erro
                        bal.logf("mount %s: retrieve index from %s", mounts[0], mounts[0].KeepService)
                        idx, err := mounts[0].KeepService.IndexMount(c, mounts[0].UUID, "")
                        if err != nil {
-                               errs <- fmt.Errorf("%s: retrieve index: %v", mounts[0], err)
+                               select {
+                               case errs <- fmt.Errorf("%s: retrieve index: %v", mounts[0], err):
+                               default:
+                               }
                                return
                        }
                        if len(errs) > 0 {
@@ -308,9 +311,9 @@ func (bal *Balancer) GetCurrentState(c *arvados.Client, pageSize, bufs int) erro
                                return
                        }
                        for _, mount := range mounts {
-                               bal.logf("%s: add %d replicas to map", mount, len(idx))
+                               bal.logf("%s: add %d entries to map", mount, len(idx))
                                bal.BlockStateMap.AddReplicas(mount, idx)
-                               bal.logf("%s: added %d replicas", mount, len(idx))
+                               bal.logf("%s: added %d entries to map at %dx (%d replicas)", mount, len(idx), mount.Replication, len(idx)*mount.Replication)
                        }
                        bal.logf("mount %s: index done", mounts[0])
                }(mounts)
@@ -330,7 +333,10 @@ func (bal *Balancer) GetCurrentState(c *arvados.Client, pageSize, bufs int) erro
                for coll := range collQ {
                        err := bal.addCollection(coll)
                        if err != nil {
-                               errs <- err
+                               select {
+                               case errs <- err:
+                               default:
+                               }
                                for range collQ {
                                }
                                return
@@ -360,7 +366,10 @@ func (bal *Balancer) GetCurrentState(c *arvados.Client, pageSize, bufs int) erro
                        })
                close(collQ)
                if err != nil {
-                       errs <- err
+                       select {
+                       case errs <- err:
+                       default:
+                       }
                }
        }()
 
@@ -520,7 +529,7 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
                        slots = append(slots, slot{
                                mnt:  mnt,
                                repl: repl,
-                               want: repl != nil && (mnt.ReadOnly || repl.Mtime >= bal.MinMtime),
+                               want: repl != nil && mnt.ReadOnly,
                        })
                }
        }
@@ -568,14 +577,14 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
                                // Prefer a mount that satisfies the
                                // desired class.
                                return bal.mountsByClass[class][si.mnt]
-                       } else if wanti, wantj := si.want, si.want; wanti != wantj {
+                       } else if si.want != sj.want {
                                // Prefer a mount that will have a
                                // replica no matter what we do here
                                // -- either because it already has an
                                // untrashable replica, or because we
                                // already need it to satisfy a
                                // different storage class.
-                               return slots[i].want
+                               return si.want
                        } else if orderi, orderj := srvRendezvous[si.mnt.KeepService], srvRendezvous[sj.mnt.KeepService]; orderi != orderj {
                                // Prefer a better rendezvous
                                // position.
@@ -716,7 +725,7 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
                // TODO: request a Touch if Mtime is duplicated.
                var change int
                switch {
-               case !underreplicated && slot.repl != nil && !slot.want && !unsafeToDelete[slot.repl.Mtime]:
+               case !underreplicated && !slot.want && slot.repl != nil && slot.repl.Mtime < bal.MinMtime && !unsafeToDelete[slot.repl.Mtime]:
                        slot.mnt.KeepService.AddTrash(Trash{
                                SizedDigest: blkid,
                                Mtime:       slot.repl.Mtime,
@@ -780,6 +789,26 @@ type balancerStats struct {
        trashes       int
        replHistogram []int
        classStats    map[string]replicationStats
+
+       // collectionBytes / collectionBlockBytes = deduplication ratio
+       collectionBytes      int64 // sum(bytes in referenced blocks) across all collections
+       collectionBlockBytes int64 // sum(block size) across all blocks referenced by collections
+       collectionBlockRefs  int64 // sum(number of blocks referenced) across all collections
+       collectionBlocks     int64 // number of blocks referenced by any collection
+}
+
+func (s *balancerStats) dedupByteRatio() float64 {
+       if s.collectionBlockBytes == 0 {
+               return 0
+       }
+       return float64(s.collectionBytes) / float64(s.collectionBlockBytes)
+}
+
+func (s *balancerStats) dedupBlockRatio() float64 {
+       if s.collectionBlocks == 0 {
+               return 0
+       }
+       return float64(s.collectionBlockRefs) / float64(s.collectionBlocks)
 }
 
 type replicationStats struct {
@@ -803,6 +832,13 @@ func (bal *Balancer) collectStatistics(results <-chan balanceResult) {
                surplus := result.have - result.want
                bytes := result.blkid.Size()
 
+               if rc := int64(result.blk.RefCount); rc > 0 {
+                       s.collectionBytes += rc * bytes
+                       s.collectionBlockBytes += bytes
+                       s.collectionBlockRefs += rc
+                       s.collectionBlocks++
+               }
+
                for class, state := range result.classState {
                        cs := s.classStats[class]
                        if state.unachievable {