X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/15488bac7de5fa73c2695589c6436a6848615e84..ba57b6537679889b42693ecd839a94d59c716aaf:/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go?ds=sidebyside diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go index f28d4c2826..e768b509cd 100644 --- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go +++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go @@ -105,7 +105,7 @@ func doMain() error { PollInterval: time.Duration(theConfig.PollPeriod), DoneProcessing: make(chan struct{})} - if _, err := daemon.SdNotify("READY=1"); err != nil { + if _, err := daemon.SdNotify(false, "READY=1"); err != nil { log.Printf("Error notifying init daemon: %v", err) } @@ -195,6 +195,7 @@ func submit(dispatcher *dispatch.Dispatcher, b, _ := ioutil.ReadAll(stdoutReader) stdoutReader.Close() stdoutChan <- b + close(stdoutChan) }() stderrChan := make(chan []byte) @@ -202,6 +203,7 @@ func submit(dispatcher *dispatch.Dispatcher, b, _ := ioutil.ReadAll(stderrReader) stderrReader.Close() stderrChan <- b + close(stderrChan) }() // Send a tiny script on stdin to execute the crunch-run command @@ -209,13 +211,10 @@ func submit(dispatcher *dispatch.Dispatcher, io.WriteString(stdinWriter, execScript(append(crunchRunCommand, container.UUID))) stdinWriter.Close() - err = cmd.Wait() - stdoutMsg := <-stdoutChan stderrmsg := <-stderrChan - close(stdoutChan) - close(stderrChan) + err = cmd.Wait() if err != nil { submitErr = fmt.Errorf("Container submission failed: %v: %v (stderr: %q)", cmd.Args, err, stderrmsg) @@ -302,12 +301,13 @@ func run(dispatcher *dispatch.Dispatcher, // Mutex between squeue sync and running sbatch or scancel. squeueUpdater.SlurmLock.Lock() - err := scancelCmd(container).Run() + cmd := scancelCmd(container) + msg, err := cmd.CombinedOutput() squeueUpdater.SlurmLock.Unlock() if err != nil { - log.Printf("Error stopping container %s with scancel: %v", - container.UUID, err) + log.Printf("Error stopping container %s with %v %v: %v %v", + container.UUID, cmd.Path, cmd.Args, err, string(msg)) if squeueUpdater.CheckSqueue(container.UUID) { log.Printf("Container %s is still in squeue after scancel.", container.UUID)