+ log.Printf("Error getting final container state: %v", err)
+ }
+
+ var st arvados.ContainerState
+ switch con.State {
+ case dispatch.Locked:
+ st = dispatch.Queued
+ case dispatch.Running:
+ st = dispatch.Cancelled
+ default:
+ // Container state is Queued, Complete or Cancelled so stop monitoring it.
+ return
+ }
+
+ log.Printf("Container %s in state %v but missing from slurm queue, changing to %v.",
+ container.UUID, con.State, st)
+ dispatcher.UpdateState(container.UUID, st)
+ }
+ }
+}
+
+// Run or monitor a container.
+//
+// Monitor status updates. If the priority changes to zero, cancel the
+// container using scancel.
+func run(dispatcher *dispatch.Dispatcher,
+ container arvados.Container,
+ status chan arvados.Container) {
+
+ log.Printf("Monitoring container %v started", container.UUID)
+ defer log.Printf("Monitoring container %v finished", container.UUID)
+
+ monitorDone := false
+ go monitorSubmitOrCancel(dispatcher, container, &monitorDone)
+
+ for container = range status {
+ if container.State == dispatch.Locked || container.State == dispatch.Running {
+ if container.Priority == 0 {
+ log.Printf("Canceling container %s", container.UUID)
+
+ // Mutex between squeue sync and running sbatch or scancel.
+ squeueUpdater.SlurmLock.Lock()
+ err := scancelCmd(container).Run()
+ squeueUpdater.SlurmLock.Unlock()
+
+ if err != nil {
+ log.Printf("Error stopping container %s with scancel: %v",
+ container.UUID, err)
+ if squeueUpdater.CheckSqueue(container.UUID) {
+ log.Printf("Container %s is still in squeue after scancel.",
+ container.UUID)
+ continue
+ }