Merge branch '8784-dir-listings'
[arvados.git] / services / nodemanager / tests / test_computenode_dispatch_slurm.py
1 #!/usr/bin/env python
2 # Copyright (C) The Arvados Authors. All rights reserved.
3 #
4 # SPDX-License-Identifier: AGPL-3.0
5
6 from __future__ import absolute_import, print_function
7
8 import subprocess
9 import time
10 import unittest
11
12 import mock
13
14 import arvnodeman.computenode.dispatch.slurm as slurm_dispatch
15 from . import testutil
16 from .test_computenode_dispatch import ComputeNodeShutdownActorMixin, ComputeNodeUpdateActorTestCase
17
18 @mock.patch('subprocess.check_output')
19 class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
20                                             unittest.TestCase):
21     ACTOR_CLASS = slurm_dispatch.ComputeNodeShutdownActor
22
23     def check_slurm_got_args(self, proc_mock, *args):
24         self.assertTrue(proc_mock.called)
25         slurm_cmd = proc_mock.call_args[0][0]
26         for s in args:
27             self.assertIn(s, slurm_cmd)
28
29     def check_success_after_reset(self, proc_mock, end_state='drain\n', timer=False):
30         self.make_mocks(arvados_node=testutil.arvados_node_mock(63))
31         if not timer:
32             self.timer = testutil.MockTimer(False)
33         self.make_actor()
34         self.check_success_flag(None, 0)
35         self.timer.deliver()
36         self.check_success_flag(None, 0)
37         self.timer.deliver()
38         # Order is critical here: if the mock gets called when no return value
39         # or side effect is set, we may invoke a real subprocess.
40         proc_mock.return_value = end_state
41         proc_mock.side_effect = None
42         self.check_success_flag(True, 3)
43         self.check_slurm_got_args(proc_mock, 'NodeName=compute63')
44
45     def make_wait_state_test(start_state='drng\n', end_state='drain\n'):
46         def test(self, proc_mock):
47             proc_mock.return_value = start_state
48             self.check_success_after_reset(proc_mock, end_state)
49         return test
50
51     for wait_state in ['alloc\n', 'drng\n']:
52         locals()['test_wait_while_' + wait_state.strip()
53                  ] = make_wait_state_test(start_state=wait_state)
54
55     for end_state in ['idle*\n', 'down\n', 'down*\n', 'drain\n', 'fail\n']:
56         locals()['test_wait_until_' + end_state.strip()
57                  ] = make_wait_state_test(end_state=end_state)
58
59     def test_retry_failed_slurm_calls(self, proc_mock):
60         proc_mock.side_effect = subprocess.CalledProcessError(1, ["mock"])
61         self.check_success_after_reset(proc_mock)
62
63     def test_slurm_bypassed_when_no_arvados_node(self, proc_mock):
64         # Test we correctly handle a node that failed to bootstrap.
65         proc_mock.return_value = 'down\n'
66         self.make_actor(start_time=0)
67         self.check_success_flag(True)
68         self.assertFalse(proc_mock.called)
69
70     def test_node_undrained_when_shutdown_cancelled(self, proc_mock):
71         try:
72             proc_mock.side_effect = iter(['', 'drng\n', 'drng\n', ''])
73             self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
74             self.timer = testutil.MockTimer(False)
75             self.make_actor()
76             self.busywait(lambda: proc_mock.call_args is not None)
77             self.shutdown_actor.cancel_shutdown("test").get(self.TIMEOUT)
78             self.check_success_flag(False, 2)
79             self.assertEqual(proc_mock.call_args_list,
80                              [mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=DRAIN', 'Reason=Node Manager shutdown']),
81                               mock.call(['sinfo', '--noheader', '-o', '%t', '-n', 'compute99']),
82                               mock.call(['sinfo', '--noheader', '-o', '%t', '-n', 'compute99']),
83                               mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=RESUME'])])
84         finally:
85             self.shutdown_actor.actor_ref.stop()
86
87     def test_cancel_shutdown_retry(self, proc_mock):
88         proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n', 'idle\n'])
89         self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
90         self.make_actor()
91         self.check_success_flag(False, 2)
92
93     def test_issue_slurm_drain_retry(self, proc_mock):
94         proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
95         self.check_success_after_reset(proc_mock, timer=False)
96
97     def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
98         proc_mock.return_value = 'drain\n'
99         super(SLURMComputeNodeShutdownActorTestCase,
100               self).test_arvados_node_cleaned_after_shutdown()
101
102     def test_cancellable_shutdown(self, proc_mock):
103         proc_mock.return_value = 'other\n'
104         super(SLURMComputeNodeShutdownActorTestCase,
105               self).test_cancellable_shutdown()
106
107     def test_uncancellable_shutdown(self, proc_mock):
108         proc_mock.return_value = 'other\n'
109         super(SLURMComputeNodeShutdownActorTestCase,
110               self).test_uncancellable_shutdown()
111
112 @mock.patch('subprocess.check_output')
113 class SLURMComputeNodeUpdateActorTestCase(ComputeNodeUpdateActorTestCase):
114     ACTOR_CLASS = slurm_dispatch.ComputeNodeUpdateActor
115
116     def test_update_node_weight(self, check_output):
117         self.make_actor()
118         cloud_node = testutil.cloud_node_mock()
119         arv_node = testutil.arvados_node_mock()
120         self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
121         check_output.assert_called_with(['scontrol', 'update', 'NodeName=compute99', 'Weight=99000'])