X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/aa3efa4aa2749e9c20f6b889ce5968b84db283ba..7ebe828a435dcaa1b5668b72adbaad495059f211:/services/keepstore/unix_volume.go diff --git a/services/keepstore/unix_volume.go b/services/keepstore/unix_volume.go index 46f4db4095..dd62cf1319 100644 --- a/services/keepstore/unix_volume.go +++ b/services/keepstore/unix_volume.go @@ -321,7 +321,12 @@ func (v *UnixVolume) Status() *VolumeStatus { v.logger.WithError(err).Error("stat failed") return nil } - devnum := fi.Sys().(*syscall.Stat_t).Dev + // uint64() cast here supports GOOS=darwin where Dev is + // int32. If the device number is negative, the unsigned + // devnum won't be the real device number any more, but that's + // fine -- all we care about is getting the same number each + // time. + devnum := uint64(fi.Sys().(*syscall.Stat_t).Dev) var fs syscall.Statfs_t if err := syscall.Statfs(v.Root, &fs); err != nil { @@ -379,23 +384,25 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error { continue } blockdirpath := filepath.Join(v.Root, subdir) - blockdir, err := v.os.Open(blockdirpath) - if err != nil { - v.logger.WithError(err).Errorf("error reading %q", blockdirpath) - return fmt.Errorf("error reading %q: %s", blockdirpath, err) - } - v.os.stats.TickOps("readdir") - v.os.stats.Tick(&v.os.stats.ReaddirOps) - // ReadDir() (compared to Readdir(), which returns - // FileInfo structs) helps complete the sequence of - // readdirent calls as quickly as possible, reducing - // the likelihood of NFS EBADCOOKIE (523) errors. - dirents, err := blockdir.ReadDir(-1) - blockdir.Close() - if err != nil { - v.logger.WithError(err).Errorf("error reading %q", blockdirpath) - return fmt.Errorf("error reading %q: %s", blockdirpath, err) + + var dirents []os.DirEntry + for attempt := 0; ; attempt++ { + v.os.stats.TickOps("readdir") + v.os.stats.Tick(&v.os.stats.ReaddirOps) + dirents, err = os.ReadDir(blockdirpath) + if err == nil { + break + } else if attempt < 5 && strings.Contains(err.Error(), "errno 523") { + // EBADCOOKIE (NFS stopped accepting + // our readdirent cookie) -- retry a + // few times before giving up + v.logger.WithError(err).Printf("retry after error reading %s", blockdirpath) + continue + } else { + return err + } } + for _, dirent := range dirents { fileInfo, err := dirent.Info() if os.IsNotExist(err) {