X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/c3c538444c15e68e96780f157935f2baa4ba0bc5..f6e063ed8afcdd868fe1eec2fa86ac8f0f61e18b:/services/crunch-run/background.go diff --git a/services/crunch-run/background.go b/services/crunch-run/background.go index 3dbfcfcde3..deaf09cec8 100644 --- a/services/crunch-run/background.go +++ b/services/crunch-run/background.go @@ -18,7 +18,7 @@ import ( ) var ( - lockdir = "/var/run" + lockdir = "/var/lock" lockprefix = "crunch-run-" locksuffix = ".lock" ) @@ -32,7 +32,7 @@ type procinfo struct { } // Detach acquires a lock for the given uuid, and starts the current -// program as a child process (with -nodetach prepended to the given +// program as a child process (with -detached prepended to the given // arguments so the child knows not to detach again). The lock is // passed along to the child process. func Detach(uuid string, args []string, stdout, stderr io.Writer) int { @@ -62,10 +62,15 @@ func detach(uuid string, args []string, stdout, stderr io.Writer) error { } defer errfile.Close() - cmd := exec.Command(args[0], append([]string{"-nodetach"}, args[1:]...)...) + cmd := exec.Command(args[0], append([]string{"-detached"}, args[1:]...)...) cmd.Stdout = outfile cmd.Stderr = errfile + // Child inherits lockfile. cmd.ExtraFiles = []*os.File{lockfile} + // Ensure child isn't interrupted even if we receive signals + // from parent (sshd) while sending lockfile content to + // caller. + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} err = cmd.Start() if err != nil { os.Remove(outfile.Name()) @@ -87,11 +92,11 @@ func detach(uuid string, args []string, stdout, stderr io.Writer) error { return nil } -// Kill finds the crunch-run process corresponding to the given uuid, -// and sends the given signal to it. It then waits up to 1 second for -// the process to die. It returns 0 if the process is successfully -// killed or didn't exist in the first place. -func Kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) int { +// KillProcess finds the crunch-run process corresponding to the given +// uuid, and sends the given signal to it. It then waits up to 1 +// second for the process to die. It returns 0 if the process is +// successfully killed or didn't exist in the first place. +func KillProcess(uuid string, signal syscall.Signal, stdout, stderr io.Writer) int { return exitcode(stderr, kill(uuid, signal, stdout, stderr)) } @@ -127,12 +132,12 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error { if err == nil { return fmt.Errorf("pid %d: sent signal %d (%s) but process is still alive", pi.PID, signal, signal) } - fmt.Fprintln(stderr, "pid %d: %s", pi.PID, err) + fmt.Fprintf(stderr, "pid %d: %s\n", pi.PID, err) return nil } // List UUIDs of active crunch-run processes. -func List(stdout, stderr io.Writer) int { +func ListProcesses(stdout, stderr io.Writer) int { return exitcode(stderr, filepath.Walk(lockdir, func(path string, info os.FileInfo, err error) error { if info.IsDir() { return filepath.SkipDir @@ -155,7 +160,7 @@ func List(stdout, stderr io.Writer) int { // acquisition during races, e.g., by connecting to a // unix socket or checking /proc/$pid/fd/$n -> // lockfile. - err = syscall.Flock(int(f.Fd()), syscall.LOCK_SH) + err = syscall.Flock(int(f.Fd()), syscall.LOCK_SH|syscall.LOCK_NB) if err == nil { // lockfile is stale err := os.Remove(path)