Merge branch '16723-kill-vs-requeue'
authorTom Clegg <tom@tomclegg.ca>
Mon, 31 Aug 2020 14:30:04 +0000 (10:30 -0400)
committerTom Clegg <tom@tomclegg.ca>
Mon, 31 Aug 2020 14:30:04 +0000 (10:30 -0400)
closes #16723

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

lib/dispatchcloud/scheduler/run_queue.go

index dddb974b326fbe7d61c280148e71f4f5c86e7abe..d77dcee947951953b46da631c0a66ee15894a19f 100644 (file)
@@ -51,6 +51,10 @@ tryrun:
                                overquota = sorted[i:]
                                break tryrun
                        }
+                       if sch.pool.KillContainer(ctr.UUID, "about to lock") {
+                               logger.Info("not locking: crunch-run process from previous attempt has not exited")
+                               continue
+                       }
                        go sch.lockContainer(logger, ctr.UUID)
                        unalloc[it]--
                case arvados.ContainerStateLocked:
@@ -88,7 +92,7 @@ tryrun:
                                // a higher-priority container on the
                                // same instance type. Don't let this
                                // one sneak in ahead of it.
-                       } else if sch.pool.KillContainer(ctr.UUID, "about to lock") {
+                       } else if sch.pool.KillContainer(ctr.UUID, "about to start") {
                                logger.Info("not restarting yet: crunch-run process from previous attempt has not exited")
                        } else if sch.pool.StartContainer(it, ctr) {
                                // Success.