10703: Un-pyramid run().
authorTom Clegg <tom@curoverse.com>
Wed, 25 Jan 2017 21:37:26 +0000 (16:37 -0500)
committerTom Clegg <tom@curoverse.com>
Wed, 25 Jan 2017 21:52:33 +0000 (16:52 -0500)
services/crunch-dispatch-slurm/crunch-dispatch-slurm.go

index 19ab5aa2f7598efca21293afb3a3194320ea6f4d..8e614628099829714d9d81377f30f54a8eb78724 100644 (file)
@@ -250,29 +250,30 @@ func run(dispatcher *dispatch.Dispatcher,
        go monitorSubmitOrCancel(dispatcher, container, &monitorDone)
 
        for container = range status {
-               if container.State == dispatch.Locked || container.State == dispatch.Running {
-                       if container.Priority == 0 {
-                               log.Printf("Canceling container %s", container.UUID)
-
-                               // Mutex between squeue sync and running sbatch or scancel.
-                               squeueUpdater.SlurmLock.Lock()
-                               cmd := scancelCmd(container)
-                               msg, err := cmd.CombinedOutput()
-                               squeueUpdater.SlurmLock.Unlock()
-
-                               if err != nil {
-                                       log.Printf("Error stopping container %s with %v %v: %v %v",
-                                               container.UUID, cmd.Path, cmd.Args, err, string(msg))
-                                       if squeueUpdater.CheckSqueue(container.UUID) {
-                                               log.Printf("Container %s is still in squeue after scancel.",
-                                                       container.UUID)
-                                               continue
-                                       }
-                               }
-
-                               err = dispatcher.UpdateState(container.UUID, dispatch.Cancelled)
+               if !(container.State == dispatch.Locked || container.State == dispatch.Running) {
+                       continue
+               }
+               if container.Priority != 0 {
+                       continue
+               }
+               log.Printf("Canceling container %s", container.UUID)
+
+               // Mutex between squeue sync and running sbatch or scancel.
+               squeueUpdater.SlurmLock.Lock()
+               cmd := scancelCmd(container)
+               msg, err := cmd.CombinedOutput()
+               squeueUpdater.SlurmLock.Unlock()
+
+               if err != nil {
+                       log.Printf("Error stopping container %s with %v %v: %v %v", container.UUID, cmd.Path, cmd.Args, err, string(msg))
+                       if squeueUpdater.CheckSqueue(container.UUID) {
+                               log.Printf("Container %s is still in squeue after scancel.", container.UUID)
+                               continue
                        }
                }
+
+               // Ignore errors; if necessary, we'll try again next time
+               dispatcher.UpdateState(container.UUID, dispatch.Cancelled)
        }
        monitorDone = true
 }