From 683f5374b0fc516579c1d6dc3379fc900d642322 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Wed, 25 Jan 2017 16:37:26 -0500 Subject: [PATCH] 10703: Un-pyramid run(). --- .../crunch-dispatch-slurm.go | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go index 19ab5aa2f7..8e61462809 100644 --- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go +++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go @@ -250,29 +250,30 @@ func run(dispatcher *dispatch.Dispatcher, go monitorSubmitOrCancel(dispatcher, container, &monitorDone) for container = range status { - if container.State == dispatch.Locked || container.State == dispatch.Running { - if container.Priority == 0 { - log.Printf("Canceling container %s", container.UUID) - - // Mutex between squeue sync and running sbatch or scancel. - squeueUpdater.SlurmLock.Lock() - cmd := scancelCmd(container) - msg, err := cmd.CombinedOutput() - squeueUpdater.SlurmLock.Unlock() - - if err != nil { - log.Printf("Error stopping container %s with %v %v: %v %v", - container.UUID, cmd.Path, cmd.Args, err, string(msg)) - if squeueUpdater.CheckSqueue(container.UUID) { - log.Printf("Container %s is still in squeue after scancel.", - container.UUID) - continue - } - } - - err = dispatcher.UpdateState(container.UUID, dispatch.Cancelled) + if !(container.State == dispatch.Locked || container.State == dispatch.Running) { + continue + } + if container.Priority != 0 { + continue + } + log.Printf("Canceling container %s", container.UUID) + + // Mutex between squeue sync and running sbatch or scancel. + squeueUpdater.SlurmLock.Lock() + cmd := scancelCmd(container) + msg, err := cmd.CombinedOutput() + squeueUpdater.SlurmLock.Unlock() + + if err != nil { + log.Printf("Error stopping container %s with %v %v: %v %v", container.UUID, cmd.Path, cmd.Args, err, string(msg)) + if squeueUpdater.CheckSqueue(container.UUID) { + log.Printf("Container %s is still in squeue after scancel.", container.UUID) + continue } } + + // Ignore errors; if necessary, we'll try again next time + dispatcher.UpdateState(container.UUID, dispatch.Cancelled) } monitorDone = true } -- 2.30.2