--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package crunchrun
+
+import (
+ "os/exec"
+)
+
+// nvidiaModprobe makes sure all the nvidia kernel modules and devices
+// are set up. If we don't have all the modules/devices set up we get
+// "CUDA_ERROR_UNKNOWN".
+func nvidiaModprobe(writer *ThrottledLogger) {
+ // The underlying problem is that when normally running
+ // directly on the host, the CUDA SDK will automatically
+ // detect and set up the devices on demand. However, when
+ // running inside a container, it lacks sufficient permissions
+ // to do that. So, it needs to be set up before the container
+ // can be started.
+ //
+ // The Singularity documentation hints about this but isn't
+ // very helpful with a solution.
+ // https://sylabs.io/guides/3.7/user-guide/gpu.html#cuda-error-unknown-when-everything-seems-to-be-correctly-configured
+ //
+ // If we're running "nvidia-persistenced", it sets up most of
+ // these things on system boot.
+ //
+ // However, it seems that doesn't include /dev/nvidia-uvm
+ // We're also no guaranteed to be running
+ // "nvidia-persistenced" or otherwise have the devices set up
+ // for us. So the most robust solution is to do it ourselves.
+ //
+ // These are idempotent operations so it is harmless in the
+ // case that everything was actually already set up.
+
+ // Running nvida-smi the first time loads the core 'nvidia'
+ // kernel module creates /dev/nvidiactl the per-GPU
+ // /dev/nvidia* devices
+ nvidiaSmi := exec.Command("nvidia-smi", "-L")
+ nvidiaSmi.Stdout = writer
+ nvidiaSmi.Stderr = writer
+ err := nvidiaSmi.Run()
+ if err != nil {
+ writer.Printf("Warning %v: %v", nvidiaSmi.Args, err)
+ }
+
+ // Load the kernel modules & devices associated with
+ // /dev/nvidia-modeset, /dev/nvidia-nvlink, /dev/nvidia-uvm
+ // and /dev/nvidia-uvm-tools (-m, -l and -u). Annoyingly,
+ // these don't have multiple devices but you need to supply
+ // "-c0" anyway or it won't make the device file.
+
+ // Nvswitch devices are multi-GPU interconnects for up to 16
+ // GPUs. The "-c0 -s" flag will create /dev/nvidia-nvswitch0.
+ // If someone runs Arvados on a system with multiple
+ // nvswitches (i.e. more than 16 GPUs) they'll have to ensure
+ // that all the /dev/nvidia-nvswitch* devices exist before
+ // crunch-run starts.
+ for _, opt := range []string{"-m", "-l", "-u", "-s"} {
+ nvmodprobe := exec.Command("nvidia-modprobe", "-c0", opt)
+ nvmodprobe.Stdout = writer
+ nvmodprobe.Stderr = writer
+ err = nvmodprobe.Run()
+ if err != nil {
+ writer.Printf("Warning %v: %v", nvmodprobe.Args, err)
+ }
+ }
+}
},
}
if spec.CUDADeviceCount != 0 {
+ var deviceIds []string
+ if cudaVisibleDevices := os.Getenv("CUDA_VISIBLE_DEVICES"); cudaVisibleDevices != "" {
+ // If a resource manager such as slurm or LSF told
+ // us to select specific devices we need to propagate that.
+ deviceIds = strings.Split(cudaVisibleDevices, ",")
+ }
+
+ deviceCount := spec.CUDADeviceCount
+ if len(deviceIds) > 0 {
+ // Docker won't accept both non-empty
+ // DeviceIDs and a non-zero Count
+ //
+ // (it turns out "Count" is a dumb fallback
+ // that just allocates device 0, 1, 2, ...,
+ // Count-1)
+ deviceCount = 0
+ }
+
+ // Capabilities are confusing. The driver has generic
+ // capabilities "gpu" and "nvidia" but then there's
+ // additional capabilities "compute" and "utility"
+ // that are passed to nvidia-container-cli.
+ //
+ // "compute" means include the CUDA libraries and
+ // "utility" means include the CUDA utility programs
+ // (like nvidia-smi).
+ //
+ // https://github.com/moby/moby/blob/7b9275c0da707b030e62c96b679a976f31f929d3/daemon/nvidia_linux.go#L37
+ // https://github.com/containerd/containerd/blob/main/contrib/nvidia/nvidia.go
hostCfg.Resources.DeviceRequests = append(hostCfg.Resources.DeviceRequests, dockercontainer.DeviceRequest{
Driver: "nvidia",
- Count: spec.CUDADeviceCount,
- Capabilities: [][]string{[]string{"gpu", "nvidia", "compute"}},
+ Count: deviceCount,
+ DeviceIDs: deviceIds,
+ Capabilities: [][]string{[]string{"gpu", "nvidia", "compute", "utility"}},
})
}
for path, mount := range spec.BindMounts {
env = append(env, "SINGULARITYENV_"+k+"="+v)
}
+ // Singularity always makes all nvidia devices visible to the
+ // container. If a resource manager such as slurm or LSF told
+ // us to select specific devices we need to propagate that.
+ if cudaVisibleDevices := os.Getenv("CUDA_VISIBLE_DEVICES"); cudaVisibleDevices != "" {
+ // If a resource manager such as slurm or LSF told
+ // us to select specific devices we need to propagate that.
+ env = append(env, "SINGULARITYENV_CUDA_VISIBLE_DEVICES="+cudaVisibleDevices)
+ }
+
args = append(args, e.imageFilename)
args = append(args, e.spec.Command...)