12630: first try at adding GPU support, no tests yet

author Peter Amstutz <peter.amstutz@curii.com>

Mon, 22 Nov 2021 17:47:03 +0000 (12:47 -0500)

committer Peter Amstutz <peter.amstutz@curii.com>

Fri, 10 Dec 2021 16:24:50 +0000 (11:24 -0500)
author Peter Amstutz <peter.amstutz@curii.com>
Mon, 22 Nov 2021 17:47:03 +0000 (12:47 -0500)
committer Peter Amstutz <peter.amstutz@curii.com>
Fri, 10 Dec 2021 16:24:50 +0000 (11:24 -0500)
diff --git a/lib/crunchrun/docker.go b/lib/crunchrun/docker.go

index 07f79bbcc2d11f0239a6231288a94d84a89f87fb..573df7faf0b66ac0fa3ac97195a14b8ed5cde0cc 100644 (file)
--- a/lib/crunchrun/docker.go
+++ b/lib/crunchrun/docker.go
@@ -106,6 +106,13 @@ func (e *dockerExecutor) Create(spec containerSpec) error {
                         KernelMemory: spec.RAM, // kernel portion
                 },
         }
+       if spec.EnableCUDA {
+               hostCfg.Resources.DeviceRequests = append(hostCfg.Resources.DeviceRequests, dockercontainer.DeviceRequest{
+                       Driver:       "nvidia",
+                       Count:        -1,
+                       Capabilities: [][]string{[]string{"gpu", "nvidia", "compute"}},
+               })
+       }
         for path, mount := range spec.BindMounts {
                 bind := mount.HostPath + ":" + path
                 if mount.ReadOnly {
diff --git a/lib/crunchrun/executor.go b/lib/crunchrun/executor.go

index b7c341f3186b1af319780d19777d892be082a1cc..bffd701bcd4d0246841a2dfbb4e2d9cde5b9c2ab 100644 (file)
--- a/lib/crunchrun/executor.go
+++ b/lib/crunchrun/executor.go
@@ -24,6 +24,7 @@ type containerSpec struct {
         BindMounts    map[string]bindmount
         Command       []string
         EnableNetwork bool
+       EnableCUDA    bool
         NetworkMode   string // docker network mode, normally "default"
         CgroupParent  string
         Stdin         io.Reader
diff --git a/lib/crunchrun/singularity.go b/lib/crunchrun/singularity.go

index 5af023a83dc2dc61506818c88ccfadc5b0c22514..5637a9b4d924023688fc153e1f5a9ea57d199794 100644 (file)
--- a/lib/crunchrun/singularity.go
+++ b/lib/crunchrun/singularity.go
@@ -246,6 +246,11 @@ func (e *singularityExecutor) Start() error {
         if !e.spec.EnableNetwork {
                 args = append(args, "--net", "--network=none")
         }
+
+       if e.spec.EnableCUDA {
+               args = append(args, "--nv")
+       }
+
         readonlyflag := map[bool]string{
                 false: "rw",
                 true:  "ro",
diff --git a/sdk/go/arvados/container.go b/sdk/go/arvados/container.go

index 7c68bdb20222f59067b5c5f1d89bad8ea6fef5fe..014fd6c2bdd27512849e37abf31ad2d2a2c5b5d0 100644 (file)
--- a/sdk/go/arvados/container.go
+++ b/sdk/go/arvados/container.go
@@ -96,10 +96,14 @@ type Mount struct {
  // RuntimeConstraints specify a container's compute resources (RAM,
  // CPU) and network connectivity.
  type RuntimeConstraints struct {
-       API          bool  `json:"API"`
-       RAM          int64 `json:"ram"`
-       VCPUs        int   `json:"vcpus"`
-       KeepCacheRAM int64 `json:"keep_cache_ram"`
+       API                         bool     `json:"API"`
+       RAM                         int64    `json:"ram"`
+       VCPUs                       int      `json:"vcpus"`
+       KeepCacheRAM                int64    `json:"keep_cache_ram"`
+       CUDADriverVersion           string   `json:"cuda_driver_version"`
+       CUDACubinHardwareCapability []string `json:"cuda_cubin_hardware_capability"`
+       CUDAPTXHardwardCapability   string   `json:"cuda_ptx_hardware_capability"`
+       CUDADeviceCount             int      `json:"cuda_device_count"`
  }
  
  // SchedulingParameters specify a container's scheduling parameters
author	Peter Amstutz <peter.amstutz@curii.com>
	Mon, 22 Nov 2021 17:47:03 +0000 (12:47 -0500)
committer	Peter Amstutz <peter.amstutz@curii.com>
	Fri, 10 Dec 2021 16:24:50 +0000 (11:24 -0500)
lib/crunchrun/docker.go		patch \| blob \| history
lib/crunchrun/executor.go		patch \| blob \| history
lib/crunchrun/singularity.go		patch \| blob \| history
sdk/go/arvados/container.go		patch \| blob \| history