X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/a7a482db3954fa6470be74f0e00f6e1e105e0b6c..0f3ce3e9251fbcdd761a9b531332eb94a10381c0:/services/keepstore/volume.go diff --git a/services/keepstore/volume.go b/services/keepstore/volume.go index 5a277b6007..f597ff5781 100644 --- a/services/keepstore/volume.go +++ b/services/keepstore/volume.go @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: AGPL-3.0 -package main +package keepstore import ( "context" @@ -10,6 +10,7 @@ import ( "fmt" "io" "math/big" + "sort" "sync/atomic" "time" @@ -315,8 +316,6 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my if err != nil { return nil, fmt.Errorf("error initializing volume %s: %s", uuid, err) } - logger.Printf("started volume %s (%s), ReadOnly=%v", uuid, vol, cfgvol.ReadOnly) - sc := cfgvol.StorageClasses if len(sc) == 0 { sc = map[string]bool{"default": true} @@ -329,7 +328,8 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my KeepMount: arvados.KeepMount{ UUID: uuid, DeviceID: vol.GetDeviceID(), - ReadOnly: cfgvol.ReadOnly || va.ReadOnly, + AllowWrite: !va.ReadOnly && !cfgvol.ReadOnly, + AllowTrash: !va.ReadOnly && (!cfgvol.ReadOnly || cfgvol.AllowTrashWhenReadOnly), Replication: repl, StorageClasses: sc, }, @@ -339,10 +339,41 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my vm.mounts = append(vm.mounts, mnt) vm.mountMap[uuid] = mnt vm.readables = append(vm.readables, mnt) - if !mnt.KeepMount.ReadOnly { + if mnt.KeepMount.AllowWrite { vm.writables = append(vm.writables, mnt) } + logger.Printf("started volume %s (%s), AllowWrite=%v, AllowTrash=%v", uuid, vol, mnt.AllowWrite, mnt.AllowTrash) + } + // pri(mnt): return highest priority of any storage class + // offered by mnt + pri := func(mnt *VolumeMount) int { + any, best := false, 0 + for class := range mnt.KeepMount.StorageClasses { + if p := cluster.StorageClasses[class].Priority; !any || best < p { + best = p + any = true + } + } + return best } + // less(a,b): sort first by highest priority of any offered + // storage class (highest->lowest), then by volume UUID + less := func(a, b *VolumeMount) bool { + if pa, pb := pri(a), pri(b); pa != pb { + return pa > pb + } else { + return a.KeepMount.UUID < b.KeepMount.UUID + } + } + sort.Slice(vm.readables, func(i, j int) bool { + return less(vm.readables[i], vm.readables[j]) + }) + sort.Slice(vm.writables, func(i, j int) bool { + return less(vm.writables[i], vm.writables[j]) + }) + sort.Slice(vm.mounts, func(i, j int) bool { + return less(vm.mounts[i], vm.mounts[j]) + }) return vm, nil } @@ -351,11 +382,10 @@ func (vm *RRVolumeManager) Mounts() []*VolumeMount { } func (vm *RRVolumeManager) Lookup(uuid string, needWrite bool) *VolumeMount { - if mnt, ok := vm.mountMap[uuid]; ok && (!needWrite || !mnt.ReadOnly) { + if mnt, ok := vm.mountMap[uuid]; ok && (!needWrite || mnt.AllowWrite) { return mnt - } else { - return nil } + return nil } // AllReadable returns an array of all readable volumes @@ -363,18 +393,22 @@ func (vm *RRVolumeManager) AllReadable() []*VolumeMount { return vm.readables } -// AllWritable returns an array of all writable volumes +// AllWritable returns writable volumes, sorted by priority/uuid. Used +// by CompareAndTouch to ensure higher-priority volumes are checked +// first. func (vm *RRVolumeManager) AllWritable() []*VolumeMount { return vm.writables } -// NextWritable returns the next writable -func (vm *RRVolumeManager) NextWritable() *VolumeMount { +// NextWritable returns writable volumes, rotated by vm.counter so +// each volume gets a turn to be first. Used by PutBlock to distribute +// new data across available volumes. +func (vm *RRVolumeManager) NextWritable() []*VolumeMount { if len(vm.writables) == 0 { return nil } - i := atomic.AddUint32(&vm.counter, 1) - return vm.writables[i%uint32(len(vm.writables))] + offset := (int(atomic.AddUint32(&vm.counter, 1)) - 1) % len(vm.writables) + return append(append([]*VolumeMount(nil), vm.writables[offset:]...), vm.writables[:offset]...) } // VolumeStats returns an ioStats for the given volume.