Merge branch '14482-rubysdk-empty-dir'
[arvados.git] / lib / dispatchcloud / scheduler / sync.go
index a85162debef250ea9a8078b2ccaba1b442eb0983..47c754e243dab20ae127cd8741c9decb9eea9688 100644 (file)
@@ -5,11 +5,12 @@
 package scheduler
 
 import (
+       "fmt"
        "time"
 
        "git.curoverse.com/arvados.git/lib/dispatchcloud/container"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "github.com/Sirupsen/logrus"
+       "github.com/sirupsen/logrus"
 )
 
 // sync resolves discrepancies between the queue and the pool:
@@ -33,10 +34,10 @@ func (sch *Scheduler) sync() {
                        logger.WithError(err).Print("error cancelling container")
                }
        }
-       kill := func(ent container.QueueEnt) {
+       kill := func(ent container.QueueEnt, reason string) {
                uuid := ent.Container.UUID
                logger := sch.logger.WithField("ContainerUUID", uuid)
-               logger.Debugf("killing crunch-run process because state=%q", ent.Container.State)
+               logger.Debugf("killing crunch-run process because %s", reason)
                sch.pool.KillContainer(uuid)
        }
        qEntries, qUpdated := sch.queue.Entries()
@@ -48,6 +49,8 @@ func (sch *Scheduler) sync() {
                                go cancel(ent, "not running on any worker")
                        } else if !exited.IsZero() && qUpdated.After(exited) {
                                go cancel(ent, "state=\"Running\" after crunch-run exited")
+                       } else if ent.Container.Priority == 0 {
+                               go kill(ent, fmt.Sprintf("priority=%d", ent.Container.Priority))
                        }
                case arvados.ContainerStateComplete, arvados.ContainerStateCancelled:
                        if running {
@@ -59,7 +62,7 @@ func (sch *Scheduler) sync() {
                                // of kill() will be to make the
                                // worker available for the next
                                // container.
-                               go kill(ent)
+                               go kill(ent, fmt.Sprintf("state=%q", ent.Container.State))
                        } else {
                                sch.logger.WithFields(logrus.Fields{
                                        "ContainerUUID": uuid,
@@ -73,7 +76,7 @@ func (sch *Scheduler) sync() {
                                // a network outage and is still
                                // preparing to run a container that
                                // has already been unlocked/requeued.
-                               go kill(ent)
+                               go kill(ent, fmt.Sprintf("state=%q", ent.Container.State))
                        }
                case arvados.ContainerStateLocked:
                        if running && !exited.IsZero() && qUpdated.After(exited) {