package crunchrun
import (
+ "context"
"fmt"
"io"
"io/ioutil"
dockertypes "github.com/docker/docker/api/types"
dockercontainer "github.com/docker/docker/api/types/container"
dockerclient "github.com/docker/docker/client"
- "golang.org/x/net/context"
)
// Docker daemon won't let you set a limit less than ~10 MiB
const minDockerRAM = int64(16 * 1024 * 1024)
+// DockerAPIVersion is the API version we use to communicate with the
+// docker service. The oldest OS we support is Ubuntu 18.04 (bionic)
+// which originally shipped docker 1.17.12 / API 1.35 so there is no
+// reason to use an older API version. See
+// https://dev.arvados.org/issues/15370#note-38 and
+// https://docs.docker.com/engine/api/.
+const DockerAPIVersion = "1.35"
+
+// Number of consecutive "inspect container" failures before
+// concluding Docker is unresponsive, giving up, and cancelling the
+// container.
+const dockerWatchdogThreshold = 5
+
type dockerExecutor struct {
containerUUID string
logf func(string, ...interface{})
func newDockerExecutor(containerUUID string, logf func(string, ...interface{}), watchdogInterval time.Duration) (*dockerExecutor, error) {
// API version 1.21 corresponds to Docker 1.9, which is
// currently the minimum version we want to support.
- client, err := dockerclient.NewClient(dockerclient.DefaultDockerHost, "1.21", nil, nil)
+ client, err := dockerclient.NewClient(dockerclient.DefaultDockerHost, DockerAPIVersion, nil, nil)
if watchdogInterval < 1 {
- watchdogInterval = time.Minute
+ watchdogInterval = time.Minute * 2
}
return &dockerExecutor{
containerUUID: containerUUID,
func (e *dockerExecutor) Create(spec containerSpec) error {
cfg, hostCfg := e.config(spec)
- created, err := e.dockerclient.ContainerCreate(context.TODO(), &cfg, &hostCfg, nil, e.containerUUID)
+ created, err := e.dockerclient.ContainerCreate(context.TODO(), &cfg, &hostCfg, nil, nil, e.containerUUID)
if err != nil {
return fmt.Errorf("While creating container: %v", err)
}
return e.startIO(spec.Stdin, spec.Stdout, spec.Stderr)
}
-func (e *dockerExecutor) CgroupID() string {
- return e.containerID
+func (e *dockerExecutor) Pid() int {
+ ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(10*time.Second))
+ defer cancel()
+ ctr, err := e.dockerclient.ContainerInspect(ctx, e.containerID)
+ if err == nil && ctr.State != nil {
+ return ctr.State.Pid
+ } else {
+ return 0
+ }
}
func (e *dockerExecutor) Start() error {
// kill it.
return
} else if err != nil {
- e.logf("Error inspecting container: %s", err)
- watchdogErr <- err
- return
+ watchdogErr <- fmt.Errorf("error inspecting container: %s", err)
} else if ctr.State == nil || !(ctr.State.Running || ctr.State.Status == "created") {
- watchdogErr <- fmt.Errorf("Container is not running: State=%v", ctr.State)
- return
+ watchdogErr <- fmt.Errorf("container is not running: State=%v", ctr.State)
+ } else {
+ watchdogErr <- nil
}
}
}()
waitOk, waitErr := e.dockerclient.ContainerWait(ctx, e.containerID, dockercontainer.WaitConditionNotRunning)
+ errors := 0
for {
select {
case waitBody := <-waitOk:
return -1, ctx.Err()
case err := <-watchdogErr:
- return -1, err
+ if err == nil {
+ errors = 0
+ } else {
+ e.logf("docker watchdog: %s", err)
+ errors++
+ if errors >= dockerWatchdogThreshold {
+ e.logf("docker watchdog: giving up")
+ return -1, err
+ }
+ }
}
}
}