From: Tom Clegg Date: Thu, 9 Sep 2021 18:01:59 +0000 (-0400) Subject: 18027: Change default boot probe to `systemctl is-system-running`. X-Git-Tag: 2.3.0~73^2 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/b169b40808a571663b695d638fb2915625624734 18027: Change default boot probe to `systemctl is-system-running`. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/doc/architecture/dispatchcloud.html.textile.liquid b/doc/architecture/dispatchcloud.html.textile.liquid index e16d987b00..ae854fc2e6 100644 --- a/doc/architecture/dispatchcloud.html.textile.liquid +++ b/doc/architecture/dispatchcloud.html.textile.liquid @@ -68,7 +68,7 @@ The dispatcher and crunch-run programs are both packaged in a single executable h2. Boot probe command -The purpose of the boot probe command is to ensure the dispatcher does not try to schedule containers on an instance before the instance is ready, even if its SSH daemon comes up early in the boot process. The default boot probe command, @docker ps -q@, verifies that the docker daemon is running. It is also common to use a custom startup script in the VM image that writes a file when it finishes, and a boot probe command that checks for that file, such as @cat /var/run/boot.complete@. +The purpose of the boot probe command is to ensure the dispatcher does not try to schedule containers on an instance before the instance is ready, even if its SSH daemon comes up early in the boot process. The default boot probe command, @systemctl is-system-running@, is appropriate for images that use @systemd@ to manage the boot process. Another approach is to use a custom startup script in the VM image that writes a file when it finishes, and a boot probe command that checks for that file, such as @cat /var/run/boot.complete@. h2. Automatic instance shutdown diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index ff48d48f41..2b474ffdb0 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -1072,7 +1072,7 @@ Clusters: # Shell command to execute on each worker to determine whether # the worker is booted and ready to run containers. It should # exit zero if the worker is ready. - BootProbeCommand: "docker ps -q" + BootProbeCommand: "systemctl is-system-running" # Minimum interval between consecutive probes to a single # worker. diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go index 097396a0ae..2d47addff7 100644 --- a/lib/config/generated_config.go +++ b/lib/config/generated_config.go @@ -1078,7 +1078,7 @@ Clusters: # Shell command to execute on each worker to determine whether # the worker is booted and ready to run containers. It should # exit zero if the worker is ready. - BootProbeCommand: "docker ps -q" + BootProbeCommand: "systemctl is-system-running" # Minimum interval between consecutive probes to a single # worker. diff --git a/tools/compute-images/scripts/base.sh b/tools/compute-images/scripts/base.sh index b7d0d0f363..e4a429da6e 100644 --- a/tools/compute-images/scripts/base.sh +++ b/tools/compute-images/scripts/base.sh @@ -115,7 +115,7 @@ $SUDO sed "s/ExecStart=\(.*\)/ExecStart=\1 --default-ulimit nofile=10000:10000 $ $SUDO systemctl daemon-reload # docker should not start on boot: we restart it inside /usr/local/bin/ensure-encrypted-partitions.sh, -# and the BootProbeCommand defaults to "docker ps -q" +# and the BootProbeCommand might be "docker ps -q" $SUDO systemctl disable docker # Make sure user_allow_other is set in fuse.conf diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls index a7784fd7be..f9cf537fb8 100644 --- a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls +++ b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls @@ -106,7 +106,7 @@ arvados: CloudVMs: ResourceTags: Name: __CLUSTER__-compute-node - BootProbeCommand: 'sudo docker ps -q' + BootProbeCommand: 'systemctl is-system-running' ImageID: ami-FIXMEFIXMEFIXMEFI Driver: ec2 DriverParameters: