defer cq.mtx.Unlock()
ctr := cq.current[uuid].Container
if ctr.State == arvados.ContainerStateComplete || ctr.State == arvados.ContainerStateCancelled {
- delete(cq.current, uuid)
+ cq.delEnt(uuid, ctr.State)
}
}
cq.mtx.Lock()
defer cq.mtx.Unlock()
for uuid, ctr := range next {
- if _, keep := cq.dontupdate[uuid]; keep {
+ if _, dontupdate := cq.dontupdate[uuid]; dontupdate {
+ // Don't clobber a local update that happened
+ // after we started polling.
continue
}
if cur, ok := cq.current[uuid]; !ok {
cq.current[uuid] = cur
}
}
- for uuid := range cq.current {
- if _, keep := cq.dontupdate[uuid]; keep {
- continue
- } else if _, keep = next[uuid]; keep {
+ for uuid, ent := range cq.current {
+ if _, dontupdate := cq.dontupdate[uuid]; dontupdate {
+ // Don't expunge an entry that was
+ // added/updated locally after we started
+ // polling.
continue
- } else {
- delete(cq.current, uuid)
+ } else if _, stillpresent := next[uuid]; !stillpresent {
+ // Expunge an entry that no longer appears in
+ // the poll response (evidently it's
+ // cancelled, completed, deleted, or taken by
+ // a different dispatcher).
+ cq.delEnt(uuid, ent.Container.State)
}
}
cq.dontupdate = nil
return nil
}
+// Caller must have lock.
+func (cq *Queue) delEnt(uuid string, state arvados.ContainerState) {
+ cq.logger.WithFields(logrus.Fields{
+ "ContainerUUID": uuid,
+ "State": state,
+ }).Info("dropping container from queue")
+ delete(cq.current, uuid)
+}
+
func (cq *Queue) addEnt(uuid string, ctr arvados.Container) {
it, err := cq.chooseType(&ctr)
if err != nil && (ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked) {
+ // We assume here that any chooseType error is a hard
+ // error: it wouldn't help to try again, or to leave
+ // it for a different dispatcher process to attempt.
errorString := err.Error()
- cq.logger.WithField("ContainerUUID", ctr.UUID).Warn("cancel container with no suitable instance type")
+ logger := cq.logger.WithField("ContainerUUID", ctr.UUID)
+ logger.WithError(err).Warn("cancel container with no suitable instance type")
go func() {
+ if ctr.State == arvados.ContainerStateQueued {
+ // Can't set runtime error without
+ // locking first. If Lock() is
+ // successful, it will call addEnt()
+ // again itself, and we'll fall
+ // through to the
+ // setRuntimeError/Cancel code below.
+ err := cq.Lock(ctr.UUID)
+ if err != nil {
+ logger.WithError(err).Warn("lock failed")
+ // ...and try again on the
+ // next Update, if the problem
+ // still exists.
+ }
+ return
+ }
var err error
defer func() {
if err == nil {
if latest.State == arvados.ContainerStateCancelled {
return
}
- cq.logger.WithField("ContainerUUID", ctr.UUID).WithError(err).Warn("error while trying to cancel unsatisfiable container")
+ logger.WithError(err).Warn("error while trying to cancel unsatisfiable container")
}()
- if ctr.State == arvados.ContainerStateQueued {
- err = cq.Lock(ctr.UUID)
- if err != nil {
- return
- }
- }
err = cq.setRuntimeError(ctr.UUID, errorString)
if err != nil {
return
}()
return
}
+ cq.logger.WithFields(logrus.Fields{
+ "ContainerUUID": ctr.UUID,
+ "State": ctr.State,
+ "Priority": ctr.Priority,
+ "InstanceType": it.Name,
+ }).Info("adding container to queue")
cq.current[uuid] = QueueEnt{Container: ctr, InstanceType: it}
}