Merge branch '17040-slow-query' refs #17040
[arvados.git] / lib / crunchrun / background.go
index bf039afa0ad53799183607fe9795b5556f615bad..da536107947187e3e88f1b59800a8d217666ca00 100644 (file)
@@ -162,7 +162,7 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
        return nil
 }
 
-// List UUIDs of active crunch-run processes.
+// ListProcesses lists UUIDs of active crunch-run processes.
 func ListProcesses(stdout, stderr io.Writer) int {
        // filepath.Walk does not follow symlinks, so we must walk
        // lockdir+"/." in case lockdir itself is a symlink.
@@ -218,6 +218,24 @@ func ListProcesses(stdout, stderr io.Writer) int {
                        return nil
                }
 
+               proc, err := os.FindProcess(pi.PID)
+               if err != nil {
+                       // FindProcess should have succeeded, even if the
+                       // process does not exist.
+                       fmt.Fprintf(stderr, "%s: find process %d: %s", path, pi.PID, err)
+                       return nil
+               }
+               err = proc.Signal(syscall.Signal(0))
+               if err != nil {
+                       // Process is dead, even though lockfile was
+                       // still locked. Most likely a stuck arv-mount
+                       // process that inherited the lock from
+                       // crunch-run. Report container UUID as
+                       // "stale".
+                       fmt.Fprintln(stdout, pi.UUID, "stale")
+                       return nil
+               }
+
                fmt.Fprintln(stdout, pi.UUID)
                return nil
        }))