X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/3facf89bf048487ee718fe15d012b489f2d407b7..8a27fe370239ecb8e50d53f46b45ed61203a35ca:/services/keepstore/volume.go?ds=sidebyside diff --git a/services/keepstore/volume.go b/services/keepstore/volume.go index 4d8a0aec7a..c3b8cd6283 100644 --- a/services/keepstore/volume.go +++ b/services/keepstore/volume.go @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: AGPL-3.0 -package main +package keepstore import ( "context" @@ -10,6 +10,7 @@ import ( "fmt" "io" "math/big" + "sort" "sync/atomic" "time" @@ -315,7 +316,7 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my if err != nil { return nil, fmt.Errorf("error initializing volume %s: %s", uuid, err) } - logger.Printf("started volume %s (%s), ReadOnly=%v", uuid, vol, cfgvol.ReadOnly) + logger.Printf("started volume %s (%s), ReadOnly=%v", uuid, vol, cfgvol.ReadOnly || va.ReadOnly) sc := cfgvol.StorageClasses if len(sc) == 0 { @@ -343,6 +344,36 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my vm.writables = append(vm.writables, mnt) } } + // pri(mnt): return highest priority of any storage class + // offered by mnt + pri := func(mnt *VolumeMount) int { + any, best := false, 0 + for class := range mnt.KeepMount.StorageClasses { + if p := cluster.StorageClasses[class].Priority; !any || best < p { + best = p + any = true + } + } + return best + } + // less(a,b): sort first by highest priority of any offered + // storage class (highest->lowest), then by volume UUID + less := func(a, b *VolumeMount) bool { + if pa, pb := pri(a), pri(b); pa != pb { + return pa > pb + } else { + return a.KeepMount.UUID < b.KeepMount.UUID + } + } + sort.Slice(vm.readables, func(i, j int) bool { + return less(vm.readables[i], vm.readables[j]) + }) + sort.Slice(vm.writables, func(i, j int) bool { + return less(vm.writables[i], vm.writables[j]) + }) + sort.Slice(vm.mounts, func(i, j int) bool { + return less(vm.mounts[i], vm.mounts[j]) + }) return vm, nil } @@ -362,18 +393,22 @@ func (vm *RRVolumeManager) AllReadable() []*VolumeMount { return vm.readables } -// AllWritable returns an array of all writable volumes +// AllWritable returns writable volumes, sorted by priority/uuid. Used +// by CompareAndTouch to ensure higher-priority volumes are checked +// first. func (vm *RRVolumeManager) AllWritable() []*VolumeMount { return vm.writables } -// NextWritable returns the next writable -func (vm *RRVolumeManager) NextWritable() *VolumeMount { +// NextWritable returns writable volumes, rotated by vm.counter so +// each volume gets a turn to be first. Used by PutBlock to distribute +// new data across available volumes. +func (vm *RRVolumeManager) NextWritable() []*VolumeMount { if len(vm.writables) == 0 { return nil } - i := atomic.AddUint32(&vm.counter, 1) - return vm.writables[i%uint32(len(vm.writables))] + offset := (int(atomic.AddUint32(&vm.counter, 1)) - 1) % len(vm.writables) + return append(append([]*VolumeMount(nil), vm.writables[offset:]...), vm.writables[:offset]...) } // VolumeStats returns an ioStats for the given volume.