3 from __future__ import absolute_import, print_function
11 import arvnodeman.computenode.dispatch.slurm as slurm_dispatch
12 from . import testutil
13 from .test_computenode_dispatch import ComputeNodeShutdownActorMixin
15 @mock.patch('subprocess.check_output')
16 class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
18 ACTOR_CLASS = slurm_dispatch.ComputeNodeShutdownActor
20 def check_slurm_got_args(self, proc_mock, *args):
21 self.assertTrue(proc_mock.called)
22 slurm_cmd = proc_mock.call_args[0][0]
24 self.assertIn(s, slurm_cmd)
26 def check_success_after_reset(self, proc_mock, end_state='drain\n'):
27 self.make_mocks(arvados_node=testutil.arvados_node_mock(63))
29 self.check_success_flag(None, 0)
30 self.check_success_flag(None, 0)
31 # Order is critical here: if the mock gets called when no return value
32 # or side effect is set, we may invoke a real subprocess.
33 proc_mock.return_value = end_state
34 proc_mock.side_effect = None
35 self.check_success_flag(True, 3)
36 self.check_slurm_got_args(proc_mock, 'compute63')
38 def make_wait_state_test(start_state='drng\n', end_state='drain\n'):
39 def test(self, proc_mock):
40 proc_mock.return_value = start_state
41 self.check_success_after_reset(proc_mock, end_state)
44 for wait_state in ['alloc\n', 'drng\n', 'idle*\n']:
45 locals()['test_wait_while_' + wait_state.strip()
46 ] = make_wait_state_test(start_state=wait_state)
48 for end_state in ['down\n', 'down*\n', 'drain\n', 'fail\n']:
49 locals()['test_wait_until_' + end_state.strip()
50 ] = make_wait_state_test(end_state=end_state)
52 def test_retry_failed_slurm_calls(self, proc_mock):
53 proc_mock.side_effect = subprocess.CalledProcessError(1, ["mock"])
54 self.check_success_after_reset(proc_mock)
56 def test_slurm_bypassed_when_no_arvados_node(self, proc_mock):
57 # Test we correctly handle a node that failed to bootstrap.
58 proc_mock.return_value = 'idle\n'
59 self.make_actor(start_time=0)
60 self.check_success_flag(True)
61 self.assertFalse(proc_mock.called)
63 def test_node_undrained_when_shutdown_window_closes(self, proc_mock):
64 proc_mock.side_effect = iter(['drng\n', 'idle\n'])
65 self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
67 self.check_success_flag(False, 2)
68 self.check_slurm_got_args(proc_mock, 'NodeName=compute99', 'State=RESUME')
70 def test_alloc_node_undrained_when_shutdown_window_closes(self, proc_mock):
71 proc_mock.side_effect = iter(['alloc\n'])
72 self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
74 self.check_success_flag(False, 2)
75 self.check_slurm_got_args(proc_mock, 'sinfo', '--noheader', '-o', '%t', '-n', 'compute99')
77 def test_cancel_shutdown_retry(self, proc_mock):
78 proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n'])
79 self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
81 self.check_success_flag(False, 2)
83 def test_issue_slurm_drain_retry(self, proc_mock):
84 proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
85 self.check_success_after_reset(proc_mock)
87 def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
88 proc_mock.return_value = 'drain\n'
89 super(SLURMComputeNodeShutdownActorTestCase,
90 self).test_arvados_node_cleaned_after_shutdown()
92 class SLURMComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
95 def make_mocks(self, node_num):
96 self.shutdowns = testutil.MockShutdownTimer()
97 self.shutdowns._set_state(False, 300)
98 self.timer = mock.MagicMock(name='timer_mock')
99 self.updates = mock.MagicMock(name='update_mock')
100 self.cloud_mock = testutil.cloud_node_mock(node_num)
101 self.subscriber = mock.Mock(name='subscriber_mock')
102 self.cloud_client = mock.MagicMock(name='cloud_client')
103 self.cloud_client.broken.return_value = False
105 def make_actor(self, node_num=1, arv_node=None, start_time=None):
106 if not hasattr(self, 'cloud_mock'):
107 self.make_mocks(node_num)
108 if start_time is None:
109 start_time = time.time()
110 self.node_actor = slurm_dispatch.ComputeNodeMonitorActor.start(
111 self.cloud_mock, start_time, self.shutdowns,
112 testutil.cloud_node_fqdn, self.timer, self.updates, self.cloud_client,
113 arv_node, boot_fail_after=300).proxy()
114 self.node_actor.subscribe(self.subscriber).get(self.TIMEOUT)
116 @mock.patch("subprocess.check_output")
117 def test_resume_node(self, check_output):
118 arv_node = testutil.arvados_node_mock()
119 self.make_actor(arv_node=arv_node)
120 check_output.return_value = "drain\n"
121 self.node_actor.resume_node().get(self.TIMEOUT)
122 self.assertIn(mock.call(['sinfo', '--noheader', '-o', '%t', '-n', arv_node['hostname']]), check_output.call_args_list)
123 self.assertIn(mock.call(['scontrol', 'update', 'NodeName=' + arv_node['hostname'], 'State=RESUME']), check_output.call_args_list)
125 @mock.patch("subprocess.check_output")
126 def test_no_resume_idle_node(self, check_output):
127 arv_node = testutil.arvados_node_mock()
128 self.make_actor(arv_node=arv_node)
129 check_output.return_value = "idle\n"
130 self.node_actor.resume_node().get(self.TIMEOUT)
131 self.assertIn(mock.call(['sinfo', '--noheader', '-o', '%t', '-n', arv_node['hostname']]), check_output.call_args_list)
132 self.assertNotIn(mock.call(['scontrol', 'update', 'NodeName=' + arv_node['hostname'], 'State=RESUME']), check_output.call_args_list)
134 @mock.patch("subprocess.check_output")
135 def test_resume_node_exception(self, check_output):
136 arv_node = testutil.arvados_node_mock()
137 self.make_actor(arv_node=arv_node)
138 check_output.side_effect = Exception()
139 self.node_actor.resume_node().get(self.TIMEOUT)
140 self.assertIn(mock.call(['sinfo', '--noheader', '-o', '%t', '-n', arv_node['hostname']]), check_output.call_args_list)
141 self.assertNotIn(mock.call(['scontrol', 'update', 'NodeName=' + arv_node['hostname'], 'State=RESUME']), check_output.call_args_list)
143 @mock.patch("subprocess.check_output")
144 def test_shutdown_down_node(self, check_output):
145 check_output.return_value = "down\n"
146 self.make_actor(arv_node=testutil.arvados_node_mock())
147 self.assertIs(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT))
149 @mock.patch("subprocess.check_output")
150 def test_no_shutdown_drain_node(self, check_output):
151 check_output.return_value = "drain\n"
152 self.make_actor(arv_node=testutil.arvados_node_mock())
153 self.assertEquals('node is draining', self.node_actor.shutdown_eligible().get(self.TIMEOUT))