12630: Call nvidia-modprobe, support CUDA_VISIBLE_DEVICES
authorPeter Amstutz <peter.amstutz@curii.com>
Fri, 7 Jan 2022 22:13:09 +0000 (17:13 -0500)
committerPeter Amstutz <peter.amstutz@curii.com>
Fri, 7 Jan 2022 22:16:44 +0000 (17:16 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

lib/crunchrun/crunchrun.go
lib/crunchrun/cuda.go [new file with mode: 0644]
lib/crunchrun/docker.go
lib/crunchrun/singularity.go

index b237d9fa590cef847366be35a4b728e1442e7941..fb2200a56b8760e5e2e655ce82aacb8fc2d56815 100644 (file)
@@ -987,6 +987,10 @@ func (runner *ContainerRunner) CreateContainer(imageID string, bindmounts map[st
        runner.executorStdout = stdout
        runner.executorStderr = stderr
 
+       if runner.Container.RuntimeConstraints.CUDA.DeviceCount > 0 {
+               nvidiaModprobe(runner.CrunchLog)
+       }
+
        return runner.executor.Create(containerSpec{
                Image:           imageID,
                VCPUs:           runner.Container.RuntimeConstraints.VCPUs,
diff --git a/lib/crunchrun/cuda.go b/lib/crunchrun/cuda.go
new file mode 100644 (file)
index 0000000..91949c5
--- /dev/null
@@ -0,0 +1,63 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package crunchrun
+
+import (
+       "io"
+       "os/exec"
+)
+
+// nvidiaModprobe makes sure all the nvidia kernel modules and devices
+// are set up.  If we don't have all the modules/devices set up we get
+// "CUDA_ERROR_UNKNOWN".
+func nvidiaModprobe(writer io.Writer) {
+       // The underlying problem is that when normally running
+       // directly on the host, the CUDA SDK will automatically
+       // detect and set up the devices on demand.  However, when
+       // running inside a container, it lacks sufficient permissions
+       // to do that.  So, it needs to be set up before the container
+       // can be started.
+       //
+       // The Singularity documentation hints about this but isn't
+       // very helpful with a solution.
+       // https://sylabs.io/guides/3.7/user-guide/gpu.html#cuda-error-unknown-when-everything-seems-to-be-correctly-configured
+       //
+       // If we're running "nvidia-persistenced", it sets up most of
+       // these things on system boot.
+       //
+       // However, it seems that doesn't include /dev/nvidia-uvm
+       // We're also no guaranteed to be running
+       // "nvidia-persistenced" or otherwise have the devices set up
+       // for us.  So the most robust solution is to do it ourselves.
+       //
+       // These are idempotent operations so it is harmless in the
+       // case that everything was actually already set up.
+
+       // Running nvida-smi the first time loads the core 'nvidia'
+       // kernel module creates /dev/nvidiactl the per-GPU
+       // /dev/nvidia* devices
+       nvidiaSmi := exec.Command("nvidia-smi", "-L")
+       nvidiaSmi.Stdout = writer
+       nvidiaSmi.Stderr = writer
+       nvidiaSmi.Run()
+
+       // Load the kernel modules & devices associated with
+       // /dev/nvidia-modeset, /dev/nvidia-nvlink, /dev/nvidia-uvm
+       // and /dev/nvidia-uvm-tools (-m, -l and -u).  Annoyingly, you
+       // don't have multiple devices but you need to supply "-c0"
+       // anyway or it won't make the device file.
+       exec.Command("nvidia-modprobe", "-c0", "-m").Run()
+       exec.Command("nvidia-modprobe", "-c0", "-l").Run()
+       exec.Command("nvidia-modprobe", "-c0", "-u").Run()
+
+       // Nvswitch devices are multi-GPU interconnects for up to 16
+       // GPUs.  Here we'll create /dev/nvidia-nvswitch0.  If someone
+       // runs Arvados on a system with multiple nvswitches
+       // (i.e. more than 16 GPUs) they can either ensure that the
+       // additional /dev/nvidia-nvswitch* devices exist before
+       // crunch-run starts or pay for support (because they clearly
+       // have the budget for it).
+       exec.Command("nvidia-modprobe", "-c0", "-s").Run()
+}
index ab00273ef3a8eda38fccfc67b2a6e0b3c00659d3..f437d6a0c39d6c25d42fec9b5d312ca73a01d5d9 100644 (file)
@@ -107,10 +107,26 @@ func (e *dockerExecutor) config(spec containerSpec) (dockercontainer.Config, doc
                },
        }
        if spec.CUDADeviceCount != 0 {
+               var deviceIds []string
+               for _, s := range os.Environ() {
+                       // If a resource manager such as slurm or LSF told
+                       // us to select specific devices we need to propagate that.
+                       if strings.HasPrefix(s, "CUDA_VISIBLE_DEVICES=") {
+                               deviceIds = strings.SplitN(strings.SplitN(s, "=", 2)[1], ",")
+                       }
+               }
+               deviceCount := spec.CUDADeviceCount
+               if len(deviceIds) > 0 {
+                       // Docker won't accept both non-empty
+                       // DeviceIDs and a non-zero Count
+                       deviceCount = 0
+               }
+
                hostCfg.Resources.DeviceRequests = append(hostCfg.Resources.DeviceRequests, dockercontainer.DeviceRequest{
                        Driver:       "nvidia",
-                       Count:        spec.CUDADeviceCount,
-                       Capabilities: [][]string{[]string{"gpu", "nvidia", "compute"}},
+                       Count:        deviceCount,
+                       DeviceIDs:    deviceIds,
+                       Capabilities: [][]string{[]string{"gpu", "nvidia"}},
                })
        }
        for path, mount := range spec.BindMounts {
index cda10aa611b7cf8d3d1d03822495f91a9ee0f8d6..942de4300e087a95306fc2de1d20288535581a67 100644 (file)
@@ -10,6 +10,7 @@ import (
        "os"
        "os/exec"
        "sort"
+       "strings"
        "syscall"
        "time"
 
@@ -284,6 +285,15 @@ func (e *singularityExecutor) execCmd(path string) *exec.Cmd {
                env = append(env, "SINGULARITYENV_"+k+"="+v)
        }
 
+       // Singularity always makes all nvidia devices visible to the
+       // container.  If a resource manager such as slurm or LSF told
+       // us to select specific devices we need to propagate that.
+       for _, s := range os.Environ() {
+               if strings.HasPrefix(s, "CUDA_VISIBLE_DEVICES=") {
+                       env = append(env, "SINGULARITYENV_"+s)
+               }
+       }
+
        args = append(args, e.imageFilename)
        args = append(args, e.spec.Command...)