From 926c011fb4f7a4d7722b88a19afed51c5d4bd1c4 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 28 Feb 2022 17:06:11 -0500 Subject: [PATCH] 18656: Update cwltool version and tests for CUDA extension tweaks Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- .licenseignore | 3 +- sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml | 34 +++-- sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml | 34 +++-- sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml | 34 +++-- sdk/cwl/arvados_cwl/arvcontainer.py | 4 +- sdk/cwl/setup.py | 2 +- sdk/cwl/tests/test_container.py | 136 ++++++++++++-------- 7 files changed, 159 insertions(+), 88 deletions(-) diff --git a/.licenseignore b/.licenseignore index e3289aa7c7..97ce38af93 100644 --- a/.licenseignore +++ b/.licenseignore @@ -87,4 +87,5 @@ sdk/python/tests/fed-migrate/*.cwl sdk/python/tests/fed-migrate/*.cwlex doc/install/*.xlsx sdk/cwl/tests/wf/hello.txt -sdk/cwl/tests/wf/indir1/hello2.txt \ No newline at end of file +sdk/cwl/tests/wf/indir1/hello2.txt +sdk/cwl/tests/chipseq/data/Genomes/* \ No newline at end of file diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml index d5efa31a00..6e2d4f1d92 100644 --- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml +++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml @@ -359,13 +359,29 @@ $graph: See https://docs.nvidia.com/deploy/cuda-compatibility/ for details. - cudaComputeCapabilityMin: - type: string - doc: Minimum CUDA hardware capability required to run the software, in X.Y format. - deviceCountMin: - type: int? + cudaComputeCapability: + type: + - 'string' + - 'string[]' + doc: | + CUDA hardware capability required to run the software, in X.Y + format. + + * If this is a single value, it defines only the minimum + compute capability. GPUs with higher capability are also + accepted. + + * If it is an array value, then only select GPUs with compute + capabilities that explicitly appear in the array. + cudaDeviceCountMin: + type: ['null', int, cwl:Expression] default: 1 - doc: Minimum number of GPU devices to request, default 1. - deviceCountMax: - type: int? - doc: Maximum number of GPU devices to request. If not specified, same as `deviceCountMin`. + doc: | + Minimum number of GPU devices to request. If not specified, + same as `cudaDeviceCountMax`. If neither are specified, + default 1. + cudaDeviceCountMax: + type: ['null', int, cwl:Expression] + doc: | + Maximum number of GPU devices to request. If not specified, + same as `cudaDeviceCountMin`. diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml index 4a6b6947ff..0e81347d72 100644 --- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml +++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml @@ -302,13 +302,29 @@ $graph: See https://docs.nvidia.com/deploy/cuda-compatibility/ for details. - cudaComputeCapabilityMin: - type: string - doc: Minimum CUDA hardware capability required to run the software, in X.Y format. - deviceCountMin: - type: int? + cudaComputeCapability: + type: + - 'string' + - 'string[]' + doc: | + CUDA hardware capability required to run the software, in X.Y + format. + + * If this is a single value, it defines only the minimum + compute capability. GPUs with higher capability are also + accepted. + + * If it is an array value, then only select GPUs with compute + capabilities that explicitly appear in the array. + cudaDeviceCountMin: + type: ['null', int, cwl:Expression] default: 1 - doc: Minimum number of GPU devices to request, default 1. - deviceCountMax: - type: int? - doc: Maximum number of GPU devices to request. If not specified, same as `deviceCountMin`. + doc: | + Minimum number of GPU devices to request. If not specified, + same as `cudaDeviceCountMax`. If neither are specified, + default 1. + cudaDeviceCountMax: + type: ['null', int, cwl:Expression] + doc: | + Maximum number of GPU devices to request. If not specified, + same as `cudaDeviceCountMin`. diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml index e95b6543fd..e9f70bf1cf 100644 --- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml +++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml @@ -304,13 +304,29 @@ $graph: See https://docs.nvidia.com/deploy/cuda-compatibility/ for details. - cudaComputeCapabilityMin: - type: string - doc: Minimum CUDA hardware capability required to run the software, in X.Y format. - deviceCountMin: - type: int? + cudaComputeCapability: + type: + - 'string' + - 'string[]' + doc: | + CUDA hardware capability required to run the software, in X.Y + format. + + * If this is a single value, it defines only the minimum + compute capability. GPUs with higher capability are also + accepted. + + * If it is an array value, then only select GPUs with compute + capabilities that explicitly appear in the array. + cudaDeviceCountMin: + type: ['null', int, cwl:Expression] default: 1 - doc: Minimum number of GPU devices to request, default 1. - deviceCountMax: - type: int? - doc: Maximum number of GPU devices to request. If not specified, same as `deviceCountMin`. + doc: | + Minimum number of GPU devices to request. If not specified, + same as `cudaDeviceCountMax`. If neither are specified, + default 1. + cudaDeviceCountMax: + type: ['null', int, cwl:Expression] + doc: | + Maximum number of GPU devices to request. If not specified, + same as `cudaDeviceCountMin`. diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py index 753c2c2502..2a5ff3a13a 100644 --- a/sdk/cwl/arvados_cwl/arvcontainer.py +++ b/sdk/cwl/arvados_cwl/arvcontainer.py @@ -295,9 +295,9 @@ class ArvadosContainer(JobBase): cuda_req, _ = self.get_requirement("http://commonwl.org/cwltool#CUDARequirement") if cuda_req: runtime_constraints["cuda"] = { - "device_count": cuda_req.get("deviceCountMin", 1), + "device_count": resources.get("cudaDeviceCount", 1), "driver_version": cuda_req["cudaVersionMin"], - "hardware_capability": cuda_req["cudaComputeCapabilityMin"] + "hardware_capability": aslist(cuda_req["cudaComputeCapability"])[0] } if self.timelimit is not None and self.timelimit > 0: diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py index e126d170b7..c885ebd4b1 100644 --- a/sdk/cwl/setup.py +++ b/sdk/cwl/setup.py @@ -36,7 +36,7 @@ setup(name='arvados-cwl-runner', # file to determine what version of cwltool and schema-salad to # build. install_requires=[ - 'cwltool==3.1.20220217222804', + 'cwltool==3.1.20220224085855', 'schema-salad==8.2.20211116214159', 'arvados-python-client{}'.format(pysdk_dep), 'setuptools', diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py index 72774daba3..bfffe6eacb 100644 --- a/sdk/cwl/tests/test_container.py +++ b/sdk/cwl/tests/test_container.py @@ -1053,68 +1053,90 @@ class TestContainer(unittest.TestCase): runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} - tool = cmap({ - "inputs": [], - "outputs": [], - "baseCommand": "nvidia-smi", - "arguments": [], - "id": "", - "cwlVersion": "v1.2", - "class": "CommandLineTool", - "requirements": [ - { + test_cwl_req = [{ "class": "http://commonwl.org/cwltool#CUDARequirement", "cudaVersionMin": "11.0", - "cudaComputeCapabilityMin": "9.0", - } - ] - }) + "cudaComputeCapability": "9.0", + }, { + "class": "http://commonwl.org/cwltool#CUDARequirement", + "cudaVersionMin": "11.0", + "cudaComputeCapability": "9.0", + "cudaDeviceCountMin": 2 + }, { + "class": "http://commonwl.org/cwltool#CUDARequirement", + "cudaVersionMin": "11.0", + "cudaComputeCapability": ["4.0", "5.0"], + "cudaDeviceCountMin": 2 + }] + + test_arv_req = [{ + 'device_count': 1, + 'driver_version': "11.0", + 'hardware_capability': "9.0" + }, { + 'device_count': 2, + 'driver_version': "11.0", + 'hardware_capability': "9.0" + }, { + 'device_count': 2, + 'driver_version': "11.0", + 'hardware_capability': "4.0" + }] + + for test_case in range(0, len(test_cwl_req)): - loadingContext, runtimeContext = self.helper(runner, True) + tool = cmap({ + "inputs": [], + "outputs": [], + "baseCommand": "nvidia-smi", + "arguments": [], + "id": "", + "cwlVersion": "v1.2", + "class": "CommandLineTool", + "requirements": [test_cwl_req[test_case]] + }) - arvtool = cwltool.load_tool.load_tool(tool, loadingContext) - arvtool.formatgraph = None + loadingContext, runtimeContext = self.helper(runner, True) - for j in arvtool.job({}, mock.MagicMock(), runtimeContext): - j.run(runtimeContext) - runner.api.container_requests().create.assert_called_with( - body=JsonDiffMatcher({ - 'environment': { - 'HOME': '/var/spool/cwl', - 'TMPDIR': '/tmp' - }, - 'name': 'test_run_True', - 'runtime_constraints': { - 'vcpus': 1, - 'ram': 268435456, - 'cuda': { - 'device_count': 1, - 'driver_version': "11.0", - 'hardware_capability': "9.0" - } - }, - 'use_existing': True, - 'priority': 500, - 'mounts': { - '/tmp': {'kind': 'tmp', - "capacity": 1073741824 - }, - '/var/spool/cwl': {'kind': 'tmp', - "capacity": 1073741824 } - }, - 'state': 'Committed', - 'output_name': 'Output for step test_run_True', - 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', - 'output_path': '/var/spool/cwl', - 'output_ttl': 0, - 'container_image': '99999999999999999999999999999993+99', - 'command': ['nvidia-smi'], - 'cwd': '/var/spool/cwl', - 'scheduling_parameters': {}, - 'properties': {}, - 'secret_mounts': {}, - 'output_storage_classes': ["default"] - })) + arvtool = cwltool.load_tool.load_tool(tool, loadingContext) + arvtool.formatgraph = None + + for j in arvtool.job({}, mock.MagicMock(), runtimeContext): + j.run(runtimeContext) + runner.api.container_requests().create.assert_called_with( + body=JsonDiffMatcher({ + 'environment': { + 'HOME': '/var/spool/cwl', + 'TMPDIR': '/tmp' + }, + 'name': 'test_run_True' + ("" if test_case == 0 else "_"+str(test_case+1)), + 'runtime_constraints': { + 'vcpus': 1, + 'ram': 268435456, + 'cuda': test_arv_req[test_case] + }, + 'use_existing': True, + 'priority': 500, + 'mounts': { + '/tmp': {'kind': 'tmp', + "capacity": 1073741824 + }, + '/var/spool/cwl': {'kind': 'tmp', + "capacity": 1073741824 } + }, + 'state': 'Committed', + 'output_name': 'Output for step test_run_True' + ("" if test_case == 0 else "_"+str(test_case+1)), + 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', + 'output_path': '/var/spool/cwl', + 'output_ttl': 0, + 'container_image': '99999999999999999999999999999993+99', + 'command': ['nvidia-smi'], + 'cwd': '/var/spool/cwl', + 'scheduling_parameters': {}, + 'properties': {}, + 'secret_mounts': {}, + 'output_storage_classes': ["default"] + })) # The test passes no builder.resources -- 2.30.2