12630: Remove distinction between cubin/PTX hardware capabilities
authorPeter Amstutz <peter.amstutz@curii.com>
Wed, 24 Nov 2021 20:26:46 +0000 (15:26 -0500)
committerPeter Amstutz <peter.amstutz@curii.com>
Fri, 10 Dec 2021 16:24:54 +0000 (11:24 -0500)
Fix misspelled "HardwareCapability"

Add cuda_ fields runtime_constraints documentation.

Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

doc/_includes/_container_runtime_constraints.liquid
lib/crunchrun/crunchrun.go
sdk/go/arvados/container.go

index 7e0c8f18f27910cd146742467a7d38feed39b428..dcdc29cf3f26646d80de501a0431e946d177f431 100644 (file)
@@ -14,3 +14,6 @@ table(table table-bordered table-condensed).
 |vcpus|integer|Number of cores to be used to run this process.|Optional. However, a ContainerRequest that is in "Committed" state must provide this.|
 |keep_cache_ram|integer|Number of keep cache bytes to be used to run this process.|Optional.|
 |API|boolean|When set, ARVADOS_API_HOST and ARVADOS_API_TOKEN will be set, and container will have networking enabled to access the Arvados API server.|Optional.|
+|cuda_driver_version|string|Minimum CUDA driver version.|Optional.|
+|cuda_hardware_capability|string|Minimum CUDA hardware capability.|Optional.|
+|cuda_device_count|int|Number of GPUs to request.|Optional.|
index 589a046a34c0edb94ed97c48b7776cd63c1091cd..7e68dcd3314685bf4132739bb17ff0ad2e9ba7b8 100644 (file)
@@ -986,6 +986,18 @@ func (runner *ContainerRunner) CreateContainer(imageID string, bindmounts map[st
        runner.executorStdin = stdin
        runner.executorStdout = stdout
        runner.executorStderr = stderr
+
+       cudaDeviceCount := 0
+       if runner.Container.RuntimeConstraints.CUDADriverVersion != "" ||
+               runner.Container.RuntimeConstraints.CUDAHardwareCapability != "" ||
+               runner.Container.RuntimeConstraints.CUDADeviceCount != 0 {
+               // if any of these are set, enable CUDA GPU support
+               cudaDeviceCount = runner.Container.RuntimeConstraints.CUDADeviceCount
+               if cudaDeviceCount == 0 {
+                       cudaDeviceCount = 1
+               }
+       }
+
        return runner.executor.Create(containerSpec{
                Image:           imageID,
                VCPUs:           runner.Container.RuntimeConstraints.VCPUs,
@@ -995,7 +1007,7 @@ func (runner *ContainerRunner) CreateContainer(imageID string, bindmounts map[st
                BindMounts:      bindmounts,
                Command:         runner.Container.Command,
                EnableNetwork:   enableNetwork,
-               CUDADeviceCount: runner.Container.RuntimeConstraints.CUDADeviceCount,
+               CUDADeviceCount: cudaDeviceCount,
                NetworkMode:     runner.networkMode,
                CgroupParent:    runner.setCgroupParent,
                Stdin:           stdin,
index 8e335f875ce71d529a37bee859ed75477ed64195..27afc1a3abbb31f380a20fdfb6110a5300168afb 100644 (file)
@@ -96,14 +96,13 @@ type Mount struct {
 // RuntimeConstraints specify a container's compute resources (RAM,
 // CPU) and network connectivity.
 type RuntimeConstraints struct {
-       API                         bool     `json:"API"`
-       RAM                         int64    `json:"ram"`
-       VCPUs                       int      `json:"vcpus"`
-       KeepCacheRAM                int64    `json:"keep_cache_ram"`
-       CUDADriverVersion           string   `json:"cuda_driver_version,omitempty"`
-       CUDACubinHardwareCapability []string `json:"cuda_cubin_hardware_capability,omitempty"`
-       CUDAPTXHardwardCapability   string   `json:"cuda_ptx_hardware_capability,omitempty"`
-       CUDADeviceCount             int      `json:"cuda_device_count,omitempty"`
+       API                    bool   `json:"API"`
+       RAM                    int64  `json:"ram"`
+       VCPUs                  int    `json:"vcpus"`
+       KeepCacheRAM           int64  `json:"keep_cache_ram"`
+       CUDADriverVersion      string `json:"cuda_driver_version,omitempty"`
+       CUDAHardwareCapability string `json:"cuda_hardware_capability,omitempty"`
+       CUDADeviceCount        int    `json:"cuda_device_count,omitempty"`
 }
 
 // SchedulingParameters specify a container's scheduling parameters