Merge branch '18323-cwl-gpu2' refs #18323

author Peter Amstutz <peter.amstutz@curii.com>

Wed, 5 Jan 2022 21:50:23 +0000 (16:50 -0500)

committer Peter Amstutz <peter.amstutz@curii.com>

Wed, 5 Jan 2022 21:50:23 +0000 (16:50 -0500)
author Peter Amstutz <peter.amstutz@curii.com>
Wed, 5 Jan 2022 21:50:23 +0000 (16:50 -0500)
committer Peter Amstutz <peter.amstutz@curii.com>
Wed, 5 Jan 2022 21:50:23 +0000 (16:50 -0500)
diff --git a/doc/user/cwl/cwl-extensions.html.textile.liquid b/doc/user/cwl/cwl-extensions.html.textile.liquid

index dcddace14819326459d2efed9a4f9cfcdd0ef74a..0580dca289f431a10ebaab322833368ddd3ef107 100644 (file)
--- a/doc/user/cwl/cwl-extensions.html.textile.liquid
+++ b/doc/user/cwl/cwl-extensions.html.textile.liquid
@@ -59,10 +59,10 @@ hints:
        property2: $(inputs.value2)
  
    arv:CUDARequirement:
-    minCUDADriverVersion: "11.0"
-    minCUDAHardwareCapability: "9.0"
-    minDeviceCount: 1
-    maxDeviceCount: 1
+    cudaVersionMin: "11.0"
+    cudaComputeCapabilityMin: "9.0"
+    deviceCountMin: 1
+    deviceCountMax: 1
  {% endcodeblock %}
  
  h2(#RunInSingleContainer). arv:RunInSingleContainer
@@ -159,10 +159,10 @@ Request support for Nvidia CUDA GPU acceleration in the container.  Assumes that
  
  table(table table-bordered table-condensed).
  |_. Field |_. Type |_. Description |
-|minCUDADriverVersion|string|Required.  The CUDA SDK version corresponding to the minimum driver version supported by the container (generally, the SDK version 'X.Y' the application was compiled against).|
-|minCUDAHardwareCapability|string|Required.  The minimum CUDA hardware capability (in 'X.Y' format) required by the application's PTX or C++ GPU code (will be JIT compiled for the available hardware).|
-|minDeviceCount|integer|Minimum number of GPU devices to allocate on a single node. Required.|
-|maxDeviceCount|integer|Maximum number of GPU devices to allocate on a single node. Optional.  If not specified, same as @minDeviceCount@.|
+|cudaVersionMin|string|Required.  The CUDA SDK version corresponding to the minimum driver version supported by the container (generally, the SDK version 'X.Y' the application was compiled against).|
+|cudaComputeCapabilityMin|string|Required.  The minimum CUDA hardware capability (in 'X.Y' format) required by the application's PTX or C++ GPU code (will be JIT compiled for the available hardware).|
+|deviceCountMin|integer|Minimum number of GPU devices to allocate on a single node. Required.|
+|deviceCountMax|integer|Maximum number of GPU devices to allocate on a single node. Optional.  If not specified, same as @minDeviceCount@.|
  
  h2. arv:dockerCollectionPDH
  
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py

index df5866d67db3132dd5551ada3ddb101a2bd95a3c..c3848b2629188c9cf687e8b69f054237b9e5b77e 100644 (file)
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -251,7 +251,7 @@ def add_arv_hints():
          "http://arvados.org/cwl#ClusterTarget",
          "http://arvados.org/cwl#OutputStorageClass",
          "http://arvados.org/cwl#ProcessProperties",
-        "http://arvados.org/cwl#CUDARequirement"
+        "http://commonwl.org/cwltool#CUDARequirement"
      ])
  
  def exit_signal_handler(sigcode, frame):
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml

index 6f5e7980dbb7576918b97e736abbdf4240e91748..d5efa31a00c735b5380a63083d4789088d59d563 100644 (file)
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
@@ -331,29 +331,41 @@ $graph:
          mapPredicate: propertyValue
  
  
-- name: CUDARequirement
+- name: cwltool:CUDARequirement
    type: record
    extends: cwl:ProcessRequirement
    inVocab: false
    doc: |
-    Require support for Nvidia CUDA (GPU hardware acceleration).
+    Require support for NVIDA CUDA (GPU hardware acceleration).
    fields:
      class:
        type: string
-      doc: 'arv:CUDARequirement'
+      doc: 'cwltool:CUDARequirement'
        jsonldPredicate:
          _id: "@type"
          _type: "@vocab"
-    minCUDADriverVersion:
+    cudaVersionMin:
        type: string
-      doc: Minimum CUDA driver version to run the software, in X.Y format of the associated CUDA SDK release.
-    minCUDAHardwareCapability:
+      doc: |
+        Minimum CUDA version to run the software, in X.Y format.  This
+        corresponds to a CUDA SDK release.  When running directly on
+        the host (not in a container) the host must have a compatible
+        CUDA SDK (matching the exact version, or, starting with CUDA
+        11.3, matching major version).  When run in a container, the
+        container image should provide the CUDA runtime, and the host
+        driver is injected into the container.  In this case, because
+        CUDA drivers are backwards compatible, it is possible to
+        use an older SDK with a newer driver across major versions.
+
+        See https://docs.nvidia.com/deploy/cuda-compatibility/ for
+        details.
+    cudaComputeCapabilityMin:
        type: string
        doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
-    minDeviceCount:
+    deviceCountMin:
        type: int?
        default: 1
-      doc: Minimum number of GPU devices to request, or 1.
-    maxDeviceCount:
+      doc: Minimum number of GPU devices to request, default 1.
+    deviceCountMax:
        type: int?
-      doc: Maximum number of GPU devices to request.  If not specified, same as `minDeviceCount`.
+      doc: Maximum number of GPU devices to request.  If not specified, same as `deviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml

index 5dbb838f1a2e4a81bd9ad196a5e203743cdc3ef7..4a6b6947ff4c6c05487be4e00a6ac734a52ff33f 100644 (file)
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
@@ -274,29 +274,41 @@ $graph:
          mapPredicate: propertyValue
  
  
-- name: CUDARequirement
+- name: cwltool:CUDARequirement
    type: record
    extends: cwl:ProcessRequirement
    inVocab: false
    doc: |
-    Require support for Nvidia CUDA (GPU hardware acceleration).
+    Require support for NVIDA CUDA (GPU hardware acceleration).
    fields:
      class:
        type: string
-      doc: 'arv:CUDARequirement'
+      doc: 'cwltool:CUDARequirement'
        jsonldPredicate:
          _id: "@type"
          _type: "@vocab"
-    minCUDADriverVersion:
+    cudaVersionMin:
        type: string
-      doc: Minimum CUDA driver version to run the software, in X.Y format of the associated CUDA SDK release.
-    minCUDAHardwareCapability:
+      doc: |
+        Minimum CUDA version to run the software, in X.Y format.  This
+        corresponds to a CUDA SDK release.  When running directly on
+        the host (not in a container) the host must have a compatible
+        CUDA SDK (matching the exact version, or, starting with CUDA
+        11.3, matching major version).  When run in a container, the
+        container image should provide the CUDA runtime, and the host
+        driver is injected into the container.  In this case, because
+        CUDA drivers are backwards compatible, it is possible to
+        use an older SDK with a newer driver across major versions.
+
+        See https://docs.nvidia.com/deploy/cuda-compatibility/ for
+        details.
+    cudaComputeCapabilityMin:
        type: string
        doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
-    minDeviceCount:
+    deviceCountMin:
        type: int?
        default: 1
-      doc: Minimum number of GPU devices to request, or 1.
-    maxDeviceCount:
+      doc: Minimum number of GPU devices to request, default 1.
+    deviceCountMax:
        type: int?
-      doc: Maximum number of GPU devices to request.  If not specified, same as `minDeviceCount`.
+      doc: Maximum number of GPU devices to request.  If not specified, same as `deviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml

index 5fbe5bd7f7c5327e41078014ed77b30729b84b8f..e95b6543fdb5529ef03e41318c54e49935db0fc7 100644 (file)
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
@@ -276,29 +276,41 @@ $graph:
          mapPredicate: propertyValue
  
  
-- name: CUDARequirement
+- name: cwltool:CUDARequirement
    type: record
    extends: cwl:ProcessRequirement
    inVocab: false
    doc: |
-    Require support for Nvidia CUDA (GPU hardware acceleration).
+    Require support for NVIDA CUDA (GPU hardware acceleration).
    fields:
      class:
        type: string
-      doc: 'arv:CUDARequirement'
+      doc: 'cwltool:CUDARequirement'
        jsonldPredicate:
          _id: "@type"
          _type: "@vocab"
-    minCUDADriverVersion:
+    cudaVersionMin:
        type: string
-      doc: Minimum CUDA driver version to run the software, in X.Y format of the associated CUDA SDK release.
-    minCUDAHardwareCapability:
+      doc: |
+        Minimum CUDA version to run the software, in X.Y format.  This
+        corresponds to a CUDA SDK release.  When running directly on
+        the host (not in a container) the host must have a compatible
+        CUDA SDK (matching the exact version, or, starting with CUDA
+        11.3, matching major version).  When run in a container, the
+        container image should provide the CUDA runtime, and the host
+        driver is injected into the container.  In this case, because
+        CUDA drivers are backwards compatible, it is possible to
+        use an older SDK with a newer driver across major versions.
+
+        See https://docs.nvidia.com/deploy/cuda-compatibility/ for
+        details.
+    cudaComputeCapabilityMin:
        type: string
        doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
-    minDeviceCount:
+    deviceCountMin:
        type: int?
        default: 1
-      doc: Minimum number of GPU devices to request, or 1.
-    maxDeviceCount:
+      doc: Minimum number of GPU devices to request, default 1.
+    deviceCountMax:
        type: int?
-      doc: Maximum number of GPU devices to request.  If not specified, same as `minDeviceCount`.
+      doc: Maximum number of GPU devices to request.  If not specified, same as `deviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py

index 6372caaa38b182db35dc927af5d7b5e04a1ed93d..3c7e9cfaa6d5e91eed03b03222522db9a96c83f9 100644 (file)
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -291,12 +291,12 @@ class ArvadosContainer(JobBase):
              else:
                  container_request["output_storage_classes"] = runtimeContext.intermediate_storage_classes.strip().split(",")
  
-        cuda_req, _ = self.get_requirement("http://arvados.org/cwl#CUDARequirement")
+        cuda_req, _ = self.get_requirement("http://commonwl.org/cwltool#CUDARequirement")
          if cuda_req:
              runtime_constraints["cuda"] = {
-                "device_count": cuda_req.get("minDeviceCount", 1),
-                "driver_version": cuda_req["minCUDADriverVersion"],
-                "hardware_capability": cuda_req["minCUDAHardwareCapability"]
+                "device_count": cuda_req.get("deviceCountMin", 1),
+                "driver_version": cuda_req["cudaVersionMin"],
+                "hardware_capability": cuda_req["cudaComputeCapabilityMin"]
              }
  
          if self.timelimit is not None and self.timelimit > 0:
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py

index e97572cd93aa4cc2721772b53b2759d028ccc13d..21ae4a7af0372aada304472b7da5e629e0a7956b 100644 (file)
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -1061,11 +1061,11 @@ class TestContainer(unittest.TestCase):
              "id": "",
              "cwlVersion": "v1.2",
              "class": "CommandLineTool",
-            "hints": [
+            "requirements": [
              {
-                "class": "http://arvados.org/cwl#CUDARequirement",
-                "minCUDADriverVersion": "11.0",
-                "minCUDAHardwareCapability": "9.0",
+                "class": "http://commonwl.org/cwltool#CUDARequirement",
+                "cudaVersionMin": "11.0",
+                "cudaComputeCapabilityMin": "9.0",
              }
          ]
          })
author	Peter Amstutz <peter.amstutz@curii.com>
	Wed, 5 Jan 2022 21:50:23 +0000 (16:50 -0500)
committer	Peter Amstutz <peter.amstutz@curii.com>
	Wed, 5 Jan 2022 21:50:23 +0000 (16:50 -0500)
doc/user/cwl/cwl-extensions.html.textile.liquid		patch \| blob \| history
sdk/cwl/arvados_cwl/__init__.py		patch \| blob \| history
sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml		patch \| blob \| history
sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml		patch \| blob \| history
sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml		patch \| blob \| history
sdk/cwl/arvados_cwl/arvcontainer.py		patch \| blob \| history
sdk/cwl/tests/test_container.py		patch \| blob \| history