18713: disable the nvidia-persistenced service in the compute image.
authorWard Vandewege <ward@curii.com>
Thu, 3 Feb 2022 18:56:53 +0000 (13:56 -0500)
committerWard Vandewege <ward@curii.com>
Thu, 3 Feb 2022 18:56:53 +0000 (13:56 -0500)
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward@curii.com>

tools/compute-images/scripts/base.sh

index 8ea25087826fe2d2cee9596d60621363de81ca99..b30ad38d16693654bae65ea167f75255c36c2ff4 100644 (file)
@@ -207,6 +207,11 @@ if [ "$NVIDIA_GPU_SUPPORT" == "1" ]; then
   fi
   $SUDO apt-get update
   $SUDO apt-get -y install libnvidia-container1 libnvidia-container-tools nvidia-container-toolkit
+  # This service fails to start when the image is booted without Nvidia GPUs present, which makes
+  # `systemctl is-system-running` respond with "degraded" and since that command is our default
+  # BootProbeCommand, compute nodes never finish booting from Arvados' perspective.
+  # Disable the service to avoid this.
+  $SUDO systemctl disable nvidia-persistenced.service
 fi
 
 $SUDO apt-get clean