- // Return a globally unique ID of the underlying storage
- // device if possible, otherwise "".
- GetDeviceID() string
-}
-
-// A VolumeWithExamples provides example configs to display in the
-// -help message.
-type VolumeWithExamples interface {
- Volume
- Examples() []Volume
-}
-
-// A VolumeManager tells callers which volumes can read, which volumes
-// can write, and on which volume the next write should be attempted.
-type VolumeManager interface {
- // Mounts returns all mounts (volume attachments).
- Mounts() []*VolumeMount
-
- // Lookup returns the mount with the given UUID. Returns nil
- // if the mount does not exist. If write==true, returns nil if
- // the mount is not writable.
- Lookup(uuid string, write bool) *VolumeMount
-
- // AllReadable returns all mounts.
- AllReadable() []*VolumeMount
-
- // AllWritable returns all mounts that aren't known to be in
- // a read-only state. (There is no guarantee that a write to
- // one will succeed, though.)
- AllWritable() []*VolumeMount
-
- // NextWritable returns the volume where the next new block
- // should be written. A VolumeManager can select a volume in
- // order to distribute activity across spindles, fill up disks
- // with more free space, etc.
- NextWritable() *VolumeMount
-
- // VolumeStats returns the ioStats used for tracking stats for
- // the given Volume.
- VolumeStats(Volume) *ioStats
-
- // Close shuts down the volume manager cleanly.
- Close()
-}
-
-// A VolumeMount is an attachment of a Volume to a VolumeManager.
-type VolumeMount struct {
- arvados.KeepMount
- Volume
-}
-
-// Generate a UUID the way API server would for a "KeepVolumeMount"
-// object.
-func (*VolumeMount) generateUUID() string {
- var max big.Int
- _, ok := max.SetString("zzzzzzzzzzzzzzz", 36)
- if !ok {
- panic("big.Int parse failed")
- }
- r, err := rand.Int(rand.Reader, &max)
- if err != nil {
- panic(err)
- }
- return fmt.Sprintf("zzzzz-ivpuk-%015s", r.Text(36))
-}
-
-// RRVolumeManager is a round-robin VolumeManager: the Nth call to
-// NextWritable returns the (N % len(writables))th writable Volume
-// (where writables are all Volumes v where v.Writable()==true).
-type RRVolumeManager struct {
- mounts []*VolumeMount
- mountMap map[string]*VolumeMount
- readables []*VolumeMount
- writables []*VolumeMount
- counter uint32
- iostats map[Volume]*ioStats
-}
-
-func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, myURL arvados.URL, metrics *volumeMetricsVecs) (*RRVolumeManager, error) {
- vm := &RRVolumeManager{
- iostats: make(map[Volume]*ioStats),
- }
- vm.mountMap = make(map[string]*VolumeMount)
- for uuid, cfgvol := range cluster.Volumes {
- va, ok := cfgvol.AccessViaHosts[myURL]
- if !ok && len(cfgvol.AccessViaHosts) > 0 {
- continue
- }
- dri, ok := driver[cfgvol.Driver]
- if !ok {
- return nil, fmt.Errorf("volume %s: invalid driver %q", uuid, cfgvol.Driver)
- }
- vol, err := dri(cluster, cfgvol, logger, metrics)
- if err != nil {
- return nil, fmt.Errorf("error initializing volume %s: %s", uuid, err)
- }
- logger.Printf("started volume %s (%s), ReadOnly=%v", uuid, vol, cfgvol.ReadOnly || va.ReadOnly)
-
- sc := cfgvol.StorageClasses
- if len(sc) == 0 {
- sc = map[string]bool{"default": true}
- }
- repl := cfgvol.Replication
- if repl < 1 {
- repl = 1
- }
- mnt := &VolumeMount{
- KeepMount: arvados.KeepMount{
- UUID: uuid,
- DeviceID: vol.GetDeviceID(),
- ReadOnly: cfgvol.ReadOnly || va.ReadOnly,
- Replication: repl,
- StorageClasses: sc,
- },
- Volume: vol,
- }
- vm.iostats[vol] = &ioStats{}
- vm.mounts = append(vm.mounts, mnt)
- vm.mountMap[uuid] = mnt
- vm.readables = append(vm.readables, mnt)
- if !mnt.KeepMount.ReadOnly {
- vm.writables = append(vm.writables, mnt)
- }
- }
- // pri(mnt): return highest priority of any storage class
- // offered by mnt
- pri := func(mnt *VolumeMount) int {
- any, best := false, 0
- for class := range mnt.KeepMount.StorageClasses {
- if p := cluster.StorageClasses[class].Priority; !any || best < p {
- best = p
- any = true
- }
- }
- return best
- }
- // less(a,b): sort first by highest priority of any offered
- // storage class (highest->lowest), then by volume UUID
- less := func(a, b *VolumeMount) bool {
- if pa, pb := pri(a), pri(b); pa != pb {
- return pa > pb
- } else {
- return a.KeepMount.UUID < b.KeepMount.UUID
- }
- }
- sort.Slice(vm.readables, func(i, j int) bool {
- return less(vm.readables[i], vm.readables[j])
- })
- sort.Slice(vm.writables, func(i, j int) bool {
- return less(vm.writables[i], vm.writables[j])
- })
- sort.Slice(vm.mounts, func(i, j int) bool {
- return less(vm.mounts[i], vm.mounts[j])
- })
- return vm, nil
-}
-
-func (vm *RRVolumeManager) Mounts() []*VolumeMount {
- return vm.mounts
-}
-
-func (vm *RRVolumeManager) Lookup(uuid string, needWrite bool) *VolumeMount {
- if mnt, ok := vm.mountMap[uuid]; ok && (!needWrite || !mnt.ReadOnly) {
- return mnt
- }
- return nil
-}
-
-// AllReadable returns an array of all readable volumes
-func (vm *RRVolumeManager) AllReadable() []*VolumeMount {
- return vm.readables
-}
-
-// AllWritable returns writable volumes, sorted by priority/uuid. Used
-// by CompareAndTouch to ensure higher-priority volumes are checked
-// first.
-func (vm *RRVolumeManager) AllWritable() []*VolumeMount {
- return vm.writables
-}
-
-// NextWritable returns writable volumes, rotated by vm.counter so
-// each volume gets a turn to be first. Used by PutBlock to distribute
-// new data across available volumes.
-func (vm *RRVolumeManager) NextWritable() []*VolumeMount {
- if len(vm.writables) == 0 {
- return nil
- }
- offset := (int(atomic.AddUint32(&vm.counter, 1)) - 1) % len(vm.writables)
- return append(append([]*VolumeMount(nil), vm.writables[offset:]...), vm.writables[:offset]...)
-}
-
-// VolumeStats returns an ioStats for the given volume.
-func (vm *RRVolumeManager) VolumeStats(v Volume) *ioStats {
- return vm.iostats[v]