package container
import (
+ "errors"
"io"
"sync"
"time"
}
apply(avail)
- var missing []string
+ missing := map[string]bool{}
cq.mtx.Lock()
for uuid, ent := range cq.current {
if next[uuid] == nil &&
ent.Container.State != arvados.ContainerStateCancelled &&
ent.Container.State != arvados.ContainerStateComplete {
- missing = append(missing, uuid)
+ missing[uuid] = true
}
}
cq.mtx.Unlock()
- for i, page := 0, 20; i < len(missing); i += page {
- batch := missing[i:]
- if len(batch) > page {
- batch = batch[:page]
+ for len(missing) > 0 {
+ var batch []string
+ for uuid := range missing {
+ batch = append(batch, uuid)
+ if len(batch) == 20 {
+ break
+ }
}
+ filters := []arvados.Filter{{"uuid", "in", batch}}
ended, err := cq.fetchAll(arvados.ResourceListParams{
Select: selectParam,
Order: "uuid",
Count: "none",
- Filters: []arvados.Filter{{"uuid", "in", batch}},
+ Filters: filters,
})
if err != nil {
return nil, err
}
+ if len(ended) == 0 {
+ // This is the only case where we can conclude
+ // a container has been deleted from the
+ // database. A short (but non-zero) page, on
+ // the other hand, can be caused by a response
+ // size limit.
+ for _, uuid := range batch {
+ cq.logger.WithField("ContainerUUID", uuid).Warn("container not found by controller (deleted?)")
+ delete(missing, uuid)
+ cq.mtx.Lock()
+ cq.delEnt(uuid, cq.current[uuid].Container.State)
+ cq.mtx.Unlock()
+ }
+ continue
+ }
apply(ended)
+ for _, ctr := range ended {
+ if _, ok := missing[ctr.UUID]; !ok {
+ msg := "BUG? server response did not match requested filters, erroring out rather than risk deadlock"
+ cq.logger.WithFields(logrus.Fields{
+ "ContainerUUID": ctr.UUID,
+ "Filters": filters,
+ }).Error(msg)
+ return nil, errors.New(msg)
+ }
+ delete(missing, ctr.UUID)
+ }
}
return next, nil
}