From 926c011fb4f7a4d7722b88a19afed51c5d4bd1c4 Mon Sep 17 00:00:00 2001
From: Peter Amstutz <peter.amstutz@curii.com>
Date: Mon, 28 Feb 2022 17:06:11 -0500
Subject: [PATCH] 18656: Update cwltool version and tests for CUDA extension
 tweaks

Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
---
 .licenseignore                              |   3 +-
 sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml |  34 +++--
 sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml |  34 +++--
 sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml |  34 +++--
 sdk/cwl/arvados_cwl/arvcontainer.py         |   4 +-
 sdk/cwl/setup.py                            |   2 +-
 sdk/cwl/tests/test_container.py             | 136 ++++++++++++--------
 7 files changed, 159 insertions(+), 88 deletions(-)

diff --git a/.licenseignore b/.licenseignore
index e3289aa7c7..97ce38af93 100644
--- a/.licenseignore
+++ b/.licenseignore
@@ -87,4 +87,5 @@ sdk/python/tests/fed-migrate/*.cwl
 sdk/python/tests/fed-migrate/*.cwlex
 doc/install/*.xlsx
 sdk/cwl/tests/wf/hello.txt
-sdk/cwl/tests/wf/indir1/hello2.txt
\ No newline at end of file
+sdk/cwl/tests/wf/indir1/hello2.txt
+sdk/cwl/tests/chipseq/data/Genomes/*
\ No newline at end of file
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
index d5efa31a00..6e2d4f1d92 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
@@ -359,13 +359,29 @@ $graph:
 
         See https://docs.nvidia.com/deploy/cuda-compatibility/ for
         details.
-    cudaComputeCapabilityMin:
-      type: string
-      doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
-    deviceCountMin:
-      type: int?
+    cudaComputeCapability:
+      type:
+        - 'string'
+        - 'string[]'
+      doc: |
+        CUDA hardware capability required to run the software, in X.Y
+        format.
+
+        * If this is a single value, it defines only the minimum
+          compute capability.  GPUs with higher capability are also
+          accepted.
+
+        * If it is an array value, then only select GPUs with compute
+          capabilities that explicitly appear in the array.
+    cudaDeviceCountMin:
+      type: ['null', int, cwl:Expression]
       default: 1
-      doc: Minimum number of GPU devices to request, default 1.
-    deviceCountMax:
-      type: int?
-      doc: Maximum number of GPU devices to request.  If not specified, same as `deviceCountMin`.
+      doc: |
+        Minimum number of GPU devices to request.  If not specified,
+        same as `cudaDeviceCountMax`.  If neither are specified,
+        default 1.
+    cudaDeviceCountMax:
+      type: ['null', int, cwl:Expression]
+      doc: |
+        Maximum number of GPU devices to request.  If not specified,
+        same as `cudaDeviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
index 4a6b6947ff..0e81347d72 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
@@ -302,13 +302,29 @@ $graph:
 
         See https://docs.nvidia.com/deploy/cuda-compatibility/ for
         details.
-    cudaComputeCapabilityMin:
-      type: string
-      doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
-    deviceCountMin:
-      type: int?
+    cudaComputeCapability:
+      type:
+        - 'string'
+        - 'string[]'
+      doc: |
+        CUDA hardware capability required to run the software, in X.Y
+        format.
+
+        * If this is a single value, it defines only the minimum
+          compute capability.  GPUs with higher capability are also
+          accepted.
+
+        * If it is an array value, then only select GPUs with compute
+          capabilities that explicitly appear in the array.
+    cudaDeviceCountMin:
+      type: ['null', int, cwl:Expression]
       default: 1
-      doc: Minimum number of GPU devices to request, default 1.
-    deviceCountMax:
-      type: int?
-      doc: Maximum number of GPU devices to request.  If not specified, same as `deviceCountMin`.
+      doc: |
+        Minimum number of GPU devices to request.  If not specified,
+        same as `cudaDeviceCountMax`.  If neither are specified,
+        default 1.
+    cudaDeviceCountMax:
+      type: ['null', int, cwl:Expression]
+      doc: |
+        Maximum number of GPU devices to request.  If not specified,
+        same as `cudaDeviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
index e95b6543fd..e9f70bf1cf 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
@@ -304,13 +304,29 @@ $graph:
 
         See https://docs.nvidia.com/deploy/cuda-compatibility/ for
         details.
-    cudaComputeCapabilityMin:
-      type: string
-      doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
-    deviceCountMin:
-      type: int?
+    cudaComputeCapability:
+      type:
+        - 'string'
+        - 'string[]'
+      doc: |
+        CUDA hardware capability required to run the software, in X.Y
+        format.
+
+        * If this is a single value, it defines only the minimum
+          compute capability.  GPUs with higher capability are also
+          accepted.
+
+        * If it is an array value, then only select GPUs with compute
+          capabilities that explicitly appear in the array.
+    cudaDeviceCountMin:
+      type: ['null', int, cwl:Expression]
       default: 1
-      doc: Minimum number of GPU devices to request, default 1.
-    deviceCountMax:
-      type: int?
-      doc: Maximum number of GPU devices to request.  If not specified, same as `deviceCountMin`.
+      doc: |
+        Minimum number of GPU devices to request.  If not specified,
+        same as `cudaDeviceCountMax`.  If neither are specified,
+        default 1.
+    cudaDeviceCountMax:
+      type: ['null', int, cwl:Expression]
+      doc: |
+        Maximum number of GPU devices to request.  If not specified,
+        same as `cudaDeviceCountMin`.
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 753c2c2502..2a5ff3a13a 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -295,9 +295,9 @@ class ArvadosContainer(JobBase):
         cuda_req, _ = self.get_requirement("http://commonwl.org/cwltool#CUDARequirement")
         if cuda_req:
             runtime_constraints["cuda"] = {
-                "device_count": cuda_req.get("deviceCountMin", 1),
+                "device_count": resources.get("cudaDeviceCount", 1),
                 "driver_version": cuda_req["cudaVersionMin"],
-                "hardware_capability": cuda_req["cudaComputeCapabilityMin"]
+                "hardware_capability": aslist(cuda_req["cudaComputeCapability"])[0]
             }
 
         if self.timelimit is not None and self.timelimit > 0:
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index e126d170b7..c885ebd4b1 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -36,7 +36,7 @@ setup(name='arvados-cwl-runner',
       # file to determine what version of cwltool and schema-salad to
       # build.
       install_requires=[
-          'cwltool==3.1.20220217222804',
+          'cwltool==3.1.20220224085855',
           'schema-salad==8.2.20211116214159',
           'arvados-python-client{}'.format(pysdk_dep),
           'setuptools',
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py
index 72774daba3..bfffe6eacb 100644
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -1053,68 +1053,90 @@ class TestContainer(unittest.TestCase):
         runner.api.collections().get().execute.return_value = {
             "portable_data_hash": "99999999999999999999999999999993+99"}
 
-        tool = cmap({
-            "inputs": [],
-            "outputs": [],
-            "baseCommand": "nvidia-smi",
-            "arguments": [],
-            "id": "",
-            "cwlVersion": "v1.2",
-            "class": "CommandLineTool",
-            "requirements": [
-            {
+        test_cwl_req = [{
                 "class": "http://commonwl.org/cwltool#CUDARequirement",
                 "cudaVersionMin": "11.0",
-                "cudaComputeCapabilityMin": "9.0",
-            }
-        ]
-        })
+                "cudaComputeCapability": "9.0",
+            }, {
+                "class": "http://commonwl.org/cwltool#CUDARequirement",
+                "cudaVersionMin": "11.0",
+                "cudaComputeCapability": "9.0",
+                "cudaDeviceCountMin": 2
+            }, {
+                "class": "http://commonwl.org/cwltool#CUDARequirement",
+                "cudaVersionMin": "11.0",
+                "cudaComputeCapability": ["4.0", "5.0"],
+                "cudaDeviceCountMin": 2
+            }]
+
+        test_arv_req = [{
+            'device_count': 1,
+            'driver_version': "11.0",
+            'hardware_capability': "9.0"
+        }, {
+            'device_count': 2,
+            'driver_version': "11.0",
+            'hardware_capability': "9.0"
+        }, {
+            'device_count': 2,
+            'driver_version': "11.0",
+            'hardware_capability': "4.0"
+        }]
+
+        for test_case in range(0, len(test_cwl_req)):
 
-        loadingContext, runtimeContext = self.helper(runner, True)
+            tool = cmap({
+                "inputs": [],
+                "outputs": [],
+                "baseCommand": "nvidia-smi",
+                "arguments": [],
+                "id": "",
+                "cwlVersion": "v1.2",
+                "class": "CommandLineTool",
+                "requirements": [test_cwl_req[test_case]]
+            })
 
-        arvtool = cwltool.load_tool.load_tool(tool, loadingContext)
-        arvtool.formatgraph = None
+            loadingContext, runtimeContext = self.helper(runner, True)
 
-        for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
-            j.run(runtimeContext)
-            runner.api.container_requests().create.assert_called_with(
-                body=JsonDiffMatcher({
-                    'environment': {
-                        'HOME': '/var/spool/cwl',
-                        'TMPDIR': '/tmp'
-                    },
-                    'name': 'test_run_True',
-                    'runtime_constraints': {
-                        'vcpus': 1,
-                        'ram': 268435456,
-                        'cuda': {
-                            'device_count': 1,
-                            'driver_version': "11.0",
-                            'hardware_capability': "9.0"
-                        }
-                    },
-                    'use_existing': True,
-                    'priority': 500,
-                    'mounts': {
-                        '/tmp': {'kind': 'tmp',
-                                 "capacity": 1073741824
-                             },
-                        '/var/spool/cwl': {'kind': 'tmp',
-                                           "capacity": 1073741824 }
-                    },
-                    'state': 'Committed',
-                    'output_name': 'Output for step test_run_True',
-                    'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                    'output_path': '/var/spool/cwl',
-                    'output_ttl': 0,
-                    'container_image': '99999999999999999999999999999993+99',
-                    'command': ['nvidia-smi'],
-                    'cwd': '/var/spool/cwl',
-                    'scheduling_parameters': {},
-                    'properties': {},
-                    'secret_mounts': {},
-                    'output_storage_classes': ["default"]
-                }))
+            arvtool = cwltool.load_tool.load_tool(tool, loadingContext)
+            arvtool.formatgraph = None
+
+            for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+                j.run(runtimeContext)
+                runner.api.container_requests().create.assert_called_with(
+                    body=JsonDiffMatcher({
+                        'environment': {
+                            'HOME': '/var/spool/cwl',
+                            'TMPDIR': '/tmp'
+                        },
+                        'name': 'test_run_True' + ("" if test_case == 0 else "_"+str(test_case+1)),
+                        'runtime_constraints': {
+                            'vcpus': 1,
+                            'ram': 268435456,
+                            'cuda': test_arv_req[test_case]
+                        },
+                        'use_existing': True,
+                        'priority': 500,
+                        'mounts': {
+                            '/tmp': {'kind': 'tmp',
+                                     "capacity": 1073741824
+                                 },
+                            '/var/spool/cwl': {'kind': 'tmp',
+                                               "capacity": 1073741824 }
+                        },
+                        'state': 'Committed',
+                        'output_name': 'Output for step test_run_True' + ("" if test_case == 0 else "_"+str(test_case+1)),
+                        'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+                        'output_path': '/var/spool/cwl',
+                        'output_ttl': 0,
+                        'container_image': '99999999999999999999999999999993+99',
+                        'command': ['nvidia-smi'],
+                        'cwd': '/var/spool/cwl',
+                        'scheduling_parameters': {},
+                        'properties': {},
+                        'secret_mounts': {},
+                        'output_storage_classes': ["default"]
+                    }))
 
 
     # The test passes no builder.resources
-- 
2.30.2