2 # Copyright (C) The Arvados Authors. All rights reserved.
4 # SPDX-License-Identifier: AGPL-3.0
6 from __future__ import absolute_import, print_function
14 import arvnodeman.computenode.dispatch.slurm as slurm_dispatch
15 from . import testutil
16 from .test_computenode_dispatch import ComputeNodeShutdownActorMixin, ComputeNodeUpdateActorTestCase
18 @mock.patch('subprocess.check_output')
19 class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
21 ACTOR_CLASS = slurm_dispatch.ComputeNodeShutdownActor
23 def check_slurm_got_args(self, proc_mock, *args):
24 self.assertTrue(proc_mock.called)
25 slurm_cmd = proc_mock.call_args[0][0]
27 self.assertIn(s, slurm_cmd)
29 def check_success_after_reset(self, proc_mock, end_state='drain\n', timer=False):
30 self.make_mocks(arvados_node=testutil.arvados_node_mock(63))
32 self.timer = testutil.MockTimer(False)
34 self.check_success_flag(None, 0)
35 # At this point, 1st try should have happened.
38 self.check_success_flag(None, 0)
39 # At this point, 2nd try should have happened.
41 # Order is critical here: if the mock gets called when no return value
42 # or side effect is set, we may invoke a real subprocess.
43 proc_mock.return_value = end_state
44 proc_mock.side_effect = None
49 self.check_success_flag(True, 3)
50 self.check_slurm_got_args(proc_mock, 'NodeName=compute63')
52 def make_wait_state_test(start_state='drng\n', end_state='drain\n'):
53 def test(self, proc_mock):
54 proc_mock.return_value = start_state
55 self.check_success_after_reset(proc_mock, end_state)
58 for wait_state in ['alloc\n', 'drng\n']:
59 locals()['test_wait_while_' + wait_state.strip()
60 ] = make_wait_state_test(start_state=wait_state)
62 for end_state in ['idle*\n', 'down\n', 'down*\n', 'drain\n', 'fail\n']:
63 locals()['test_wait_until_' + end_state.strip()
64 ] = make_wait_state_test(end_state=end_state)
66 def test_retry_failed_slurm_calls(self, proc_mock):
67 proc_mock.side_effect = subprocess.CalledProcessError(1, ["mock"])
68 self.check_success_after_reset(proc_mock)
70 def test_slurm_bypassed_when_no_arvados_node(self, proc_mock):
71 # Test we correctly handle a node that failed to bootstrap.
72 proc_mock.return_value = 'down\n'
73 self.make_actor(start_time=0)
74 self.check_success_flag(True)
75 self.assertFalse(proc_mock.called)
77 def test_node_resumed_when_shutdown_cancelled(self, proc_mock):
79 proc_mock.side_effect = iter(['', 'drng\n', 'drng\n', ''])
80 self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
81 self.timer = testutil.MockTimer(False)
83 self.busywait(lambda: proc_mock.call_args is not None)
84 self.shutdown_actor.cancel_shutdown("test")
85 self.check_success_flag(False, 2)
86 self.assertEqual(proc_mock.call_args_list[0], mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=DRAIN', 'Reason=Node Manager shutdown']))
87 self.assertEqual(proc_mock.call_args_list[-1], mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=RESUME']))
90 self.shutdown_actor.actor_ref.stop()
92 def test_cancel_shutdown_retry(self, proc_mock):
93 proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n', 'idle\n'])
94 self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
96 self.check_success_flag(False, 5)
98 def test_issue_slurm_drain_retry(self, proc_mock):
99 proc_mock.side_effect = iter([OSError, OSError, 'drng\n', 'drain\n'])
100 self.check_success_after_reset(proc_mock, timer=False)
102 def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
103 proc_mock.return_value = 'drain\n'
104 super(SLURMComputeNodeShutdownActorTestCase,
105 self).test_arvados_node_cleaned_after_shutdown()
107 def test_cancellable_shutdown(self, proc_mock):
108 proc_mock.return_value = 'other\n'
109 super(SLURMComputeNodeShutdownActorTestCase,
110 self).test_cancellable_shutdown()
112 def test_uncancellable_shutdown(self, proc_mock):
113 proc_mock.return_value = 'other\n'
114 super(SLURMComputeNodeShutdownActorTestCase,
115 self).test_uncancellable_shutdown()
117 @mock.patch('subprocess.check_output')
118 class SLURMComputeNodeUpdateActorTestCase(ComputeNodeUpdateActorTestCase):
119 ACTOR_CLASS = slurm_dispatch.ComputeNodeUpdateActor
121 def test_update_node_weight(self, check_output):
123 cloud_node = testutil.cloud_node_mock()
124 arv_node = testutil.arvados_node_mock()
125 self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
126 check_output.assert_called_with(['scontrol', 'update', 'NodeName=compute99', 'Weight=99000'])