1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
10 "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
11 "git.curoverse.com/arvados.git/sdk/go/arvados"
12 "github.com/Sirupsen/logrus"
15 // FixStaleLocks waits for any already-locked containers (i.e., locked
16 // by a prior dispatcher process) to appear on workers as the worker
17 // pool recovers its state. It unlocks any that still remain when all
18 // workers are recovered or shutdown, or its timer expires.
19 func FixStaleLocks(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool, limit time.Duration) {
20 wp := pool.Subscribe()
21 defer pool.Unsubscribe(wp)
22 timeout := time.NewTimer(limit)
28 // If all workers have been contacted, unlock
29 // containers that aren't claimed by any
31 unlock = pool.Workers()[worker.StateUnknown] == 0
33 // Give up and unlock the containers, even
34 // though they might be working.
38 running := pool.Running()
39 qEntries, _ := queue.Entries()
40 for uuid, ent := range qEntries {
41 if ent.Container.State != arvados.ContainerStateLocked {
44 if _, running := running[uuid]; running {
50 err := queue.Unlock(uuid)
52 logger.Warnf("Unlock %s: %s", uuid, err)