+ // If pull/trash turns out to be
+ // needed, distribute the
+ // new/remaining replicas uniformly
+ // across qualifying mounts on a given
+ // server.
+ return rendezvousLess(si.mnt.DeviceID, sj.mnt.DeviceID, blkid)
+ }
+ })
+
+ // Servers/mounts/devices (with or without existing
+ // replicas) that are part of the best achievable
+ // layout for this storage class.
+ wantSrv := map[*KeepService]bool{}
+ wantMnt := map[*KeepMount]bool{}
+ wantDev := map[string]bool{}
+ // Positions (with existing replicas) that have been
+ // protected (via unsafeToDelete) to ensure we don't
+ // reduce replication below desired level when
+ // trashing replicas that aren't optimal positions for
+ // any storage class.
+ protMnt := map[*KeepMount]bool{}
+ // Replication planned so far (corresponds to wantMnt).
+ replWant := 0
+ // Protected replication (corresponds to protMnt).
+ replProt := 0
+
+ // trySlot tries using a slot to meet requirements,
+ // and returns true if all requirements are met.
+ trySlot := func(i int) bool {
+ slot := slots[i]
+ if wantMnt[slot.mnt] || wantDev[slot.mnt.DeviceID] {
+ // Already allocated a replica to this
+ // backend device, possibly on a
+ // different server.
+ return false
+ }
+ if replProt < desired && slot.repl != nil && !protMnt[slot.mnt] {
+ unsafeToDelete[slot.repl.Mtime] = true
+ protMnt[slot.mnt] = true
+ replProt += slot.mnt.Replication
+ }
+ if replWant < desired && (slot.repl != nil || !slot.mnt.ReadOnly) {
+ slots[i].want = true
+ wantSrv[slot.mnt.KeepService] = true
+ wantMnt[slot.mnt] = true
+ if slot.mnt.DeviceID != "" {
+ wantDev[slot.mnt.DeviceID] = true
+ }
+ replWant += slot.mnt.Replication
+ }
+ return replProt >= desired && replWant >= desired
+ }
+
+ // First try to achieve desired replication without
+ // using the same server twice.
+ done := false
+ for i := 0; i < len(slots) && !done; i++ {
+ if !wantSrv[slots[i].mnt.KeepService] {
+ done = trySlot(i)
+ }
+ }
+
+ // If that didn't suffice, do another pass without the
+ // "distinct services" restriction. (Achieving the
+ // desired volume replication on fewer than the
+ // desired number of services is better than
+ // underreplicating.)
+ for i := 0; i < len(slots) && !done; i++ {
+ done = trySlot(i)
+ }
+
+ if !underreplicated {
+ safe := 0
+ for _, slot := range slots {
+ if slot.repl == nil || !bal.mountsByClass[class][slot.mnt] {
+ continue
+ }
+ if safe += slot.mnt.Replication; safe >= desired {
+ break
+ }
+ }
+ underreplicated = safe < desired
+ }
+
+ // set the unachievable flag if there aren't enough
+ // slots offering the relevant storage class. (This is
+ // as easy as checking slots[desired] because we
+ // already sorted the qualifying slots to the front.)
+ if desired >= len(slots) || !bal.mountsByClass[class][slots[desired].mnt] {
+ cs := classState[class]
+ cs.unachievable = true
+ classState[class] = cs
+ }
+
+ // Avoid deleting wanted replicas from devices that
+ // are mounted on multiple servers -- even if they
+ // haven't already been added to unsafeToDelete
+ // because the servers report different Mtimes.
+ for _, slot := range slots {
+ if slot.repl != nil && wantDev[slot.mnt.DeviceID] {
+ unsafeToDelete[slot.repl.Mtime] = true