X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/2666c3b726d190511f072db9e6606a1a99936968..0a6c326074bdba18a13428f4580a313a6b5d7687:/lib/dispatchcloud/scheduler/fix_stale_locks.go diff --git a/lib/dispatchcloud/scheduler/fix_stale_locks.go b/lib/dispatchcloud/scheduler/fix_stale_locks.go index 264f9e4ec6..dbd8b609a9 100644 --- a/lib/dispatchcloud/scheduler/fix_stale_locks.go +++ b/lib/dispatchcloud/scheduler/fix_stale_locks.go @@ -7,8 +7,8 @@ package scheduler import ( "time" - "git.curoverse.com/arvados.git/lib/dispatchcloud/worker" - "git.curoverse.com/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/lib/dispatchcloud/worker" + "git.arvados.org/arvados.git/sdk/go/arvados" ) // fixStaleLocks waits for any already-locked containers (i.e., locked @@ -19,24 +19,15 @@ import ( func (sch *Scheduler) fixStaleLocks() { wp := sch.pool.Subscribe() defer sch.pool.Unsubscribe(wp) + + var stale []string timeout := time.NewTimer(sch.staleLockTimeout) waiting: - for { - unlock := false - select { - case <-wp: - // If all workers have been contacted, unlock - // containers that aren't claimed by any - // worker. - unlock = sch.pool.CountWorkers()[worker.StateUnknown] == 0 - case <-timeout.C: - // Give up and unlock the containers, even - // though they might be working. - unlock = true - } - + for sch.pool.CountWorkers()[worker.StateUnknown] > 0 { running := sch.pool.Running() qEntries, _ := sch.queue.Entries() + + stale = nil for uuid, ent := range qEntries { if ent.Container.State != arvados.ContainerStateLocked { continue @@ -44,14 +35,24 @@ waiting: if _, running := running[uuid]; running { continue } - if !unlock { - continue waiting - } - err := sch.queue.Unlock(uuid) - if err != nil { - sch.logger.Warnf("Unlock %s: %s", uuid, err) - } + stale = append(stale, uuid) + } + if len(stale) == 0 { + return + } + + select { + case <-wp: + case <-timeout.C: + // Give up. + break waiting + } + } + + for _, uuid := range stale { + err := sch.queue.Unlock(uuid) + if err != nil { + sch.logger.Warnf("Unlock %s: %s", uuid, err) } - return } }