Merge branch 'master' into 7167-keep-rsync
[arvados.git] / services / nodemanager / tests / test_computenode_dispatch_slurm.py
1 #!/usr/bin/env python
2
3 from __future__ import absolute_import, print_function
4
5 import subprocess
6 import unittest
7
8 import mock
9
10 import arvnodeman.computenode.dispatch.slurm as slurm_dispatch
11 from . import testutil
12 from .test_computenode_dispatch import ComputeNodeShutdownActorMixin
13
14 @mock.patch('subprocess.check_output')
15 class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
16                                             unittest.TestCase):
17     ACTOR_CLASS = slurm_dispatch.ComputeNodeShutdownActor
18
19     def check_slurm_got_args(self, proc_mock, *args):
20         self.assertTrue(proc_mock.called)
21         slurm_cmd = proc_mock.call_args[0][0]
22         for s in args:
23             self.assertIn(s, slurm_cmd)
24
25     def check_success_after_reset(self, proc_mock, end_state='drain\n'):
26         self.make_mocks(arvados_node=testutil.arvados_node_mock(63))
27         self.make_actor()
28         self.check_success_flag(None, 0)
29         self.check_success_flag(None, 0)
30         # Order is critical here: if the mock gets called when no return value
31         # or side effect is set, we may invoke a real subprocess.
32         proc_mock.return_value = end_state
33         proc_mock.side_effect = None
34         self.check_success_flag(True, 3)
35         self.check_slurm_got_args(proc_mock, 'compute63')
36
37     def make_wait_state_test(start_state='drng\n', end_state='drain\n'):
38         def test(self, proc_mock):
39             proc_mock.return_value = start_state
40             self.check_success_after_reset(proc_mock, end_state)
41         return test
42
43     for wait_state in ['alloc\n', 'drng\n', 'idle*\n']:
44         locals()['test_wait_while_' + wait_state.strip()
45                  ] = make_wait_state_test(start_state=wait_state)
46
47     for end_state in ['down\n', 'down*\n', 'drain\n', 'fail\n']:
48         locals()['test_wait_until_' + end_state.strip()
49                  ] = make_wait_state_test(end_state=end_state)
50
51     def test_retry_failed_slurm_calls(self, proc_mock):
52         proc_mock.side_effect = subprocess.CalledProcessError(1, ["mock"])
53         self.check_success_after_reset(proc_mock)
54
55     def test_slurm_bypassed_when_no_arvados_node(self, proc_mock):
56         # Test we correctly handle a node that failed to bootstrap.
57         proc_mock.return_value = 'idle\n'
58         self.make_actor(start_time=0)
59         self.check_success_flag(True)
60         self.assertFalse(proc_mock.called)
61
62     def test_node_undrained_when_shutdown_window_closes(self, proc_mock):
63         proc_mock.side_effect = iter(['drng\n', 'idle\n'])
64         self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
65         self.make_actor()
66         self.check_success_flag(False, 2)
67         self.check_slurm_got_args(proc_mock, 'NodeName=compute99', 'State=RESUME')
68
69     def test_alloc_node_undrained_when_shutdown_window_closes(self, proc_mock):
70         proc_mock.side_effect = iter(['alloc\n'])
71         self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
72         self.make_actor()
73         self.check_success_flag(False, 2)
74         self.check_slurm_got_args(proc_mock, 'sinfo', '--noheader', '-o', '%t', '-n', 'compute99')
75
76     def test_cancel_shutdown_retry(self, proc_mock):
77         proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n'])
78         self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
79         self.make_actor()
80         self.check_success_flag(False, 2)
81
82     def test_issue_slurm_drain_retry(self, proc_mock):
83         proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
84         self.check_success_after_reset(proc_mock)
85
86     def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
87         proc_mock.return_value = 'drain\n'
88         super(SLURMComputeNodeShutdownActorTestCase,
89               self).test_arvados_node_cleaned_after_shutdown()