From 2a94b125b93a3aba204f55c37ecdc2876d81d642 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 7 Oct 2015 10:38:46 -0400 Subject: [PATCH] 6142: Only resume from 'drng' or 'drain'. Add/fix tests. --- .../arvnodeman/computenode/dispatch/slurm.py | 19 ++++++------------- .../tests/test_computenode_dispatch_slurm.py | 13 ++++++++++--- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py index bb397fa277..dfb26bc303 100644 --- a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py +++ b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py @@ -37,20 +37,13 @@ class ComputeNodeShutdownActor(ShutdownActorBase): @ShutdownActorBase._retry((subprocess.CalledProcessError,)) def cancel_shutdown(self): if self._nodename: - try: + if self._get_slurm_state() in self.SLURM_DRAIN_STATES: + # Resume from "drng" or "drain" self._set_node_state('RESUME') - except subprocess.CalledProcessError: - slum_state = self._get_slurm_state() - if slum_state in self.SLURM_DRAIN_STATES: - # We expect to be able to resume from "drain" or "drng" - # So if scontrol exited non-zero, something actually failed, so - # raise an exception to signal the retry to kick in. - raise - else: - # Assume scontrol exited non-zero because the node is already in - # 'idle' or 'alloc' (so it never started draining) - # we don't need to do anything else resume it. - pass + else: + # Node is in a state such as 'idle' or 'alloc' so don't + # try to resume it because that will just raise an error. + pass return super(ComputeNodeShutdownActor, self).cancel_shutdown() @ShutdownActorBase._stop_if_window_closed diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py index c5097a717c..2ddf7676c8 100644 --- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py +++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py @@ -60,12 +60,19 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin, self.assertFalse(proc_mock.called) def test_node_undrained_when_shutdown_window_closes(self, proc_mock): - proc_mock.return_value = 'alloc\n' + proc_mock.side_effect = iter(['drng\n', 'idle\n']) self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True)) self.make_actor() self.check_success_flag(False, 2) - self.check_slurm_got_args(proc_mock, 'NodeName=compute99', - 'State=RESUME') + self.check_slurm_got_args(proc_mock, 'NodeName=compute99', 'State=RESUME') + + def test_alloc_node_undrained_when_shutdown_window_closes(self, proc_mock): + proc_mock.side_effect = iter(['alloc\n']) + self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True)) + self.make_actor() + self.check_success_flag(False, 2) + self.check_slurm_got_args(proc_mock, 'sinfo', '--noheader', '-o', '%t', '-n', 'compute99') + def test_arvados_node_cleaned_after_shutdown(self, proc_mock): proc_mock.return_value = 'drain\n' -- 2.39.5