22317: Replace compute image base.sh with an Ansible playbook
[arvados.git] / tools / compute-images / ansible / roles / compute_nvidia / tasks / main.yml
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 - name: Install NVIDIA package pins
6   ansible.builtin.copy:
7     src: arvados-nvidia.pref
8     dest: /etc/apt/preferences.d/arvados-nvidia.pref
9     owner: root
10     group: root
11     mode: 0644
12   when: "arvados_compute_pin_packages|bool"
13
14 - name: Install NVIDIA CUDA apt repository
15   ansible.builtin.apt:
16     deb: "https://developer.download.nvidia.com/compute/cuda/repos/{{ ansible_distribution|lower }}{{ ansible_distribution_major_version }}{{ ansible_distribution_minor_version if ansible_distribution == 'Ubuntu' else '' }}/{{ ansible_architecture }}/cuda-keyring_1.1-1_all.deb"
17   register: cuda_apt_task
18
19 - name: Install NVIDIA container toolkit apt repository
20   ansible.builtin.deb822_repository:
21     name: nvidia-container-toolkit
22     types: deb
23     uris: "{{ nvidia_container_apt_url }}/stable/deb/$(ARCH)"
24     suites: "/"
25     signed_by: "{{ nvidia_container_apt_url }}/gpgkey"
26   register: nvidia_apt_task
27
28 - name: Install NVIDIA packages
29   ansible.builtin.apt:
30     update_cache: "{{ cuda_apt_task.changed or nvidia_apt_task.changed }}"
31     name:
32       - cuda
33       - libnvidia-container1
34       - libnvidia-container-tools
35       - nvidia-container-toolkit
36
37 - name: Copy nvidia.conf modules to nvidia.avail
38   ansible.builtin.copy:
39     src: /etc/modules-load.d/nvidia.conf
40     dest: /etc/modules-load.d/nvidia.avail
41     remote_src: true
42
43 - name: Remove nvidia.conf modules from autoloading
44   ansible.builtin.file:
45     dest: /etc/modules-load.d/nvidia.conf
46     state: absent
47
48 - name: Install dynamic module loading script
49   ansible.builtin.copy:
50     src: detect-gpu.sh
51     dest: /usr/local/sbin/detect-gpu.sh
52     owner: root
53     group: root
54     mode: 0755
55
56 - name: Prepare systemd-modules-load override directory
57   ansible.builtin.file:
58     path: /etc/systemd/system/systemd-modules-load.service.d
59     state: directory
60     owner: root
61     group: root
62     mode: 0755
63
64 - name: Install dynamic module load hook
65   ansible.builtin.copy:
66     src: detect-gpu.conf
67     dest: /etc/systemd/system/systemd-modules-load.service.d/arvados-detect-gpu.conf
68     owner: root
69     group: root
70     mode: 0644
71
72 # crunch-run has its own CUDA initialization code.
73 # We prefer to use that over NVIDIA's.
74 - name: Query nvidia-persistenced.service
75   ansible.builtin.systemd_service:
76     name: nvidia-persistenced.service
77   register: nvidia_persistenced
78
79 - name: Disable nvidia-persistenced.service
80   when: "nvidia_persistenced.status.LoadState != 'not-found'"
81   ansible.builtin.systemd_service:
82     name: nvidia-persistenced.service
83     enabled: false