projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
19215: Explicitly use bash for bash-ism in snakeoil state
[arvados.git]
/
lib
/
crunchrun
/
docker.go
diff --git
a/lib/crunchrun/docker.go
b/lib/crunchrun/docker.go
index 54d0e680fe6af83494707f58298629db74ded486..bb635265862fff8a4bf36127000297bbad16e051 100644
(file)
--- a/
lib/crunchrun/docker.go
+++ b/
lib/crunchrun/docker.go
@@
-31,6
+31,11
@@
const minDockerRAM = int64(16 * 1024 * 1024)
// https://docs.docker.com/engine/api/.
const DockerAPIVersion = "1.35"
// https://docs.docker.com/engine/api/.
const DockerAPIVersion = "1.35"
+// Number of consecutive "inspect container" failures before
+// concluding Docker is unresponsive, giving up, and cancelling the
+// container.
+const dockerWatchdogThreshold = 3
+
type dockerExecutor struct {
containerUUID string
logf func(string, ...interface{})
type dockerExecutor struct {
containerUUID string
logf func(string, ...interface{})
@@
-225,17
+230,17
@@
func (e *dockerExecutor) Wait(ctx context.Context) (int, error) {
// kill it.
return
} else if err != nil {
// kill it.
return
} else if err != nil {
- e.logf("Error inspecting container: %s", err)
- watchdogErr <- err
- return
+ watchdogErr <- fmt.Errorf("error inspecting container: %s", err)
} else if ctr.State == nil || !(ctr.State.Running || ctr.State.Status == "created") {
} else if ctr.State == nil || !(ctr.State.Running || ctr.State.Status == "created") {
- watchdogErr <- fmt.Errorf("Container is not running: State=%v", ctr.State)
- return
+ watchdogErr <- fmt.Errorf("container is not running: State=%v", ctr.State)
+ } else {
+ watchdogErr <- nil
}
}
}()
waitOk, waitErr := e.dockerclient.ContainerWait(ctx, e.containerID, dockercontainer.WaitConditionNotRunning)
}
}
}()
waitOk, waitErr := e.dockerclient.ContainerWait(ctx, e.containerID, dockercontainer.WaitConditionNotRunning)
+ errors := 0
for {
select {
case waitBody := <-waitOk:
for {
select {
case waitBody := <-waitOk:
@@
-250,7
+255,16
@@
func (e *dockerExecutor) Wait(ctx context.Context) (int, error) {
return -1, ctx.Err()
case err := <-watchdogErr:
return -1, ctx.Err()
case err := <-watchdogErr:
- return -1, err
+ if err == nil {
+ errors = 0
+ } else {
+ e.logf("docker watchdog: %s", err)
+ errors++
+ if errors >= dockerWatchdogThreshold {
+ e.logf("docker watchdog: giving up")
+ return -1, err
+ }
+ }
}
}
}
}
}
}