projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
20319: Move /containers/*/log to /container_requests/*/log.
[arvados.git]
/
lib
/
crunchrun
/
docker.go
diff --git
a/lib/crunchrun/docker.go
b/lib/crunchrun/docker.go
index 54d0e680fe6af83494707f58298629db74ded486..8d8cdfc8ba620a4317d4f48bf0f654c04cf58bef 100644
(file)
--- a/
lib/crunchrun/docker.go
+++ b/
lib/crunchrun/docker.go
@@
-4,6
+4,7
@@
package crunchrun
import (
package crunchrun
import (
+ "context"
"fmt"
"io"
"io/ioutil"
"fmt"
"io"
"io/ioutil"
@@
-17,7
+18,6
@@
import (
dockertypes "github.com/docker/docker/api/types"
dockercontainer "github.com/docker/docker/api/types/container"
dockerclient "github.com/docker/docker/client"
dockertypes "github.com/docker/docker/api/types"
dockercontainer "github.com/docker/docker/api/types/container"
dockerclient "github.com/docker/docker/client"
- "golang.org/x/net/context"
)
// Docker daemon won't let you set a limit less than ~10 MiB
)
// Docker daemon won't let you set a limit less than ~10 MiB
@@
-31,6
+31,11
@@
const minDockerRAM = int64(16 * 1024 * 1024)
// https://docs.docker.com/engine/api/.
const DockerAPIVersion = "1.35"
// https://docs.docker.com/engine/api/.
const DockerAPIVersion = "1.35"
+// Number of consecutive "inspect container" failures before
+// concluding Docker is unresponsive, giving up, and cancelling the
+// container.
+const dockerWatchdogThreshold = 3
+
type dockerExecutor struct {
containerUUID string
logf func(string, ...interface{})
type dockerExecutor struct {
containerUUID string
logf func(string, ...interface{})
@@
-225,17
+230,17
@@
func (e *dockerExecutor) Wait(ctx context.Context) (int, error) {
// kill it.
return
} else if err != nil {
// kill it.
return
} else if err != nil {
- e.logf("Error inspecting container: %s", err)
- watchdogErr <- err
- return
+ watchdogErr <- fmt.Errorf("error inspecting container: %s", err)
} else if ctr.State == nil || !(ctr.State.Running || ctr.State.Status == "created") {
} else if ctr.State == nil || !(ctr.State.Running || ctr.State.Status == "created") {
- watchdogErr <- fmt.Errorf("Container is not running: State=%v", ctr.State)
- return
+ watchdogErr <- fmt.Errorf("container is not running: State=%v", ctr.State)
+ } else {
+ watchdogErr <- nil
}
}
}()
waitOk, waitErr := e.dockerclient.ContainerWait(ctx, e.containerID, dockercontainer.WaitConditionNotRunning)
}
}
}()
waitOk, waitErr := e.dockerclient.ContainerWait(ctx, e.containerID, dockercontainer.WaitConditionNotRunning)
+ errors := 0
for {
select {
case waitBody := <-waitOk:
for {
select {
case waitBody := <-waitOk:
@@
-250,7
+255,16
@@
func (e *dockerExecutor) Wait(ctx context.Context) (int, error) {
return -1, ctx.Err()
case err := <-watchdogErr:
return -1, ctx.Err()
case err := <-watchdogErr:
- return -1, err
+ if err == nil {
+ errors = 0
+ } else {
+ e.logf("docker watchdog: %s", err)
+ errors++
+ if errors >= dockerWatchdogThreshold {
+ e.logf("docker watchdog: giving up")
+ return -1, err
+ }
+ }
}
}
}
}
}
}