Merge branch '18547-use-volume-uuid-not-device-id'
authorTom Clegg <tom@curii.com>
Tue, 7 Dec 2021 20:07:30 +0000 (15:07 -0500)
committerTom Clegg <tom@curii.com>
Tue, 7 Dec 2021 20:07:30 +0000 (15:07 -0500)
fixes #18547

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/controller/dblock/dblock.go
services/keepstore/unix_volume.go

index b0d348870b180adc42aeadc48ba8110e38f380a7..1a36822d5b7f91e81c5b0deb167a105a962b3dfb 100644 (file)
@@ -35,8 +35,8 @@ func (dbl *DBLocker) Lock(ctx context.Context, getdb func(context.Context) (*sql
        for ; ; time.Sleep(retryDelay) {
                dbl.mtx.Lock()
                if dbl.conn != nil {
-                       // Already locked by another caller in this
-                       // process. Wait for them to release.
+                       // Another goroutine is already locked/waiting
+                       // on this lock. Wait for them to release.
                        dbl.mtx.Unlock()
                        continue
                }
@@ -52,9 +52,15 @@ func (dbl *DBLocker) Lock(ctx context.Context, getdb func(context.Context) (*sql
                        dbl.mtx.Unlock()
                        continue
                }
-               _, err = conn.ExecContext(ctx, `SELECT pg_advisory_lock($1)`, dbl.key)
+               var locked bool
+               err = conn.QueryRowContext(ctx, `SELECT pg_try_advisory_lock($1)`, dbl.key).Scan(&locked)
                if err != nil {
-                       logger.WithError(err).Infof("error getting pg_advisory_lock %d", dbl.key)
+                       logger.WithError(err).Infof("error getting pg_try_advisory_lock %d", dbl.key)
+                       conn.Close()
+                       dbl.mtx.Unlock()
+                       continue
+               }
+               if !locked {
                        conn.Close()
                        dbl.mtx.Unlock()
                        continue
index 46f4db4095bfb286c82f4f07a988c16cee5ebe63..a053ba3e6b19042c48423cf31303e8cc2059fa1d 100644 (file)
@@ -379,23 +379,25 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
                        continue
                }
                blockdirpath := filepath.Join(v.Root, subdir)
-               blockdir, err := v.os.Open(blockdirpath)
-               if err != nil {
-                       v.logger.WithError(err).Errorf("error reading %q", blockdirpath)
-                       return fmt.Errorf("error reading %q: %s", blockdirpath, err)
-               }
-               v.os.stats.TickOps("readdir")
-               v.os.stats.Tick(&v.os.stats.ReaddirOps)
-               // ReadDir() (compared to Readdir(), which returns
-               // FileInfo structs) helps complete the sequence of
-               // readdirent calls as quickly as possible, reducing
-               // the likelihood of NFS EBADCOOKIE (523) errors.
-               dirents, err := blockdir.ReadDir(-1)
-               blockdir.Close()
-               if err != nil {
-                       v.logger.WithError(err).Errorf("error reading %q", blockdirpath)
-                       return fmt.Errorf("error reading %q: %s", blockdirpath, err)
+
+               var dirents []os.DirEntry
+               for attempt := 0; ; attempt++ {
+                       v.os.stats.TickOps("readdir")
+                       v.os.stats.Tick(&v.os.stats.ReaddirOps)
+                       dirents, err = os.ReadDir(blockdirpath)
+                       if err == nil {
+                               break
+                       } else if attempt < 5 && strings.Contains(err.Error(), "errno 523") {
+                               // EBADCOOKIE (NFS stopped accepting
+                               // our readdirent cookie) -- retry a
+                               // few times before giving up
+                               v.logger.WithError(err).Printf("retry after error reading %s", blockdirpath)
+                               continue
+                       } else {
+                               return err
+                       }
                }
+
                for _, dirent := range dirents {
                        fileInfo, err := dirent.Info()
                        if os.IsNotExist(err) {