Merge branch '9486-retry-instance-limit-exceeded' closes #9486
[arvados.git] / services / nodemanager / tests / test_computenode_dispatch_slurm.py
1 #!/usr/bin/env python
2
3 from __future__ import absolute_import, print_function
4
5 import subprocess
6 import time
7 import unittest
8
9 import mock
10
11 import arvnodeman.computenode.dispatch.slurm as slurm_dispatch
12 from . import testutil
13 from .test_computenode_dispatch import ComputeNodeShutdownActorMixin
14
15 @mock.patch('subprocess.check_output')
16 class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
17                                             unittest.TestCase):
18     ACTOR_CLASS = slurm_dispatch.ComputeNodeShutdownActor
19
20     def check_slurm_got_args(self, proc_mock, *args):
21         self.assertTrue(proc_mock.called)
22         slurm_cmd = proc_mock.call_args[0][0]
23         for s in args:
24             self.assertIn(s, slurm_cmd)
25
26     def check_success_after_reset(self, proc_mock, end_state='drain\n'):
27         self.make_mocks(arvados_node=testutil.arvados_node_mock(63))
28         self.make_actor()
29         self.check_success_flag(None, 0)
30         self.check_success_flag(None, 0)
31         # Order is critical here: if the mock gets called when no return value
32         # or side effect is set, we may invoke a real subprocess.
33         proc_mock.return_value = end_state
34         proc_mock.side_effect = None
35         self.check_success_flag(True, 3)
36         self.check_slurm_got_args(proc_mock, 'compute63')
37
38     def make_wait_state_test(start_state='drng\n', end_state='drain\n'):
39         def test(self, proc_mock):
40             proc_mock.return_value = start_state
41             self.check_success_after_reset(proc_mock, end_state)
42         return test
43
44     for wait_state in ['alloc\n', 'drng\n']:
45         locals()['test_wait_while_' + wait_state.strip()
46                  ] = make_wait_state_test(start_state=wait_state)
47
48     for end_state in ['idle*\n', 'down\n', 'down*\n', 'drain\n', 'fail\n']:
49         locals()['test_wait_until_' + end_state.strip()
50                  ] = make_wait_state_test(end_state=end_state)
51
52     def test_retry_failed_slurm_calls(self, proc_mock):
53         proc_mock.side_effect = subprocess.CalledProcessError(1, ["mock"])
54         self.check_success_after_reset(proc_mock)
55
56     def test_slurm_bypassed_when_no_arvados_node(self, proc_mock):
57         # Test we correctly handle a node that failed to bootstrap.
58         proc_mock.return_value = 'down\n'
59         self.make_actor(start_time=0)
60         self.check_success_flag(True)
61         self.assertFalse(proc_mock.called)
62
63     def test_node_undrained_when_shutdown_cancelled(self, proc_mock):
64         try:
65             proc_mock.side_effect = iter(['', 'drng\n', 'drng\n', ''])
66             self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
67             self.timer = testutil.MockTimer(False)
68             self.make_actor()
69             self.busywait(lambda: proc_mock.call_args is not None)
70             self.shutdown_actor.cancel_shutdown("test").get(self.TIMEOUT)
71             self.check_success_flag(False, 2)
72             self.assertEqual(proc_mock.call_args_list,
73                              [mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=DRAIN', 'Reason=Node Manager shutdown']),
74                               mock.call(['sinfo', '--noheader', '-o', '%t', '-n', 'compute99']),
75                               mock.call(['sinfo', '--noheader', '-o', '%t', '-n', 'compute99']),
76                               mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=RESUME'])])
77         finally:
78             self.shutdown_actor.actor_ref.stop()
79
80     def test_cancel_shutdown_retry(self, proc_mock):
81         proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n', 'idle\n'])
82         self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
83         self.make_actor()
84         self.check_success_flag(False, 2)
85
86     def test_issue_slurm_drain_retry(self, proc_mock):
87         proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
88         self.check_success_after_reset(proc_mock)
89
90     def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
91         proc_mock.return_value = 'drain\n'
92         super(SLURMComputeNodeShutdownActorTestCase,
93               self).test_arvados_node_cleaned_after_shutdown()