8816: Use is_cloud_exception to determine if exception is a "cloud error". Add
[arvados.git] / services / nodemanager / tests / test_computenode_dispatch_slurm.py
index ccac8b2449b1c54abbda22e4323256ec3de6834a..8648783bac5889f11a328af3b277bd1f21da5665 100644 (file)
@@ -22,21 +22,31 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         for s in args:
             self.assertIn(s, slurm_cmd)
 
-    def check_success_after_reset(self, proc_mock):
+    def check_success_after_reset(self, proc_mock, end_state='drain\n'):
         self.make_mocks(arvados_node=testutil.arvados_node_mock(63))
         self.make_actor()
         self.check_success_flag(None, 0)
         self.check_success_flag(None, 0)
         # Order is critical here: if the mock gets called when no return value
         # or side effect is set, we may invoke a real subprocess.
-        proc_mock.return_value = 'drain\n'
+        proc_mock.return_value = end_state
         proc_mock.side_effect = None
         self.check_success_flag(True, 3)
         self.check_slurm_got_args(proc_mock, 'compute63')
 
-    def test_wait_for_drained_state(self, proc_mock):
-        proc_mock.return_value = 'drng\n'
-        self.check_success_after_reset(proc_mock)
+    def make_wait_state_test(start_state='drng\n', end_state='drain\n'):
+        def test(self, proc_mock):
+            proc_mock.return_value = start_state
+            self.check_success_after_reset(proc_mock, end_state)
+        return test
+
+    for wait_state in ['alloc\n', 'drng\n', 'idle*\n']:
+        locals()['test_wait_while_' + wait_state.strip()
+                 ] = make_wait_state_test(start_state=wait_state)
+
+    for end_state in ['down\n', 'down*\n', 'drain\n', 'fail\n']:
+        locals()['test_wait_until_' + end_state.strip()
+                 ] = make_wait_state_test(end_state=end_state)
 
     def test_retry_failed_slurm_calls(self, proc_mock):
         proc_mock.side_effect = subprocess.CalledProcessError(1, ["mock"])
@@ -45,14 +55,35 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
     def test_slurm_bypassed_when_no_arvados_node(self, proc_mock):
         # Test we correctly handle a node that failed to bootstrap.
         proc_mock.return_value = 'idle\n'
-        self.make_actor()
+        self.make_actor(start_time=0)
         self.check_success_flag(True)
         self.assertFalse(proc_mock.called)
 
     def test_node_undrained_when_shutdown_window_closes(self, proc_mock):
-        proc_mock.return_value = 'alloc\n'
+        proc_mock.side_effect = iter(['drng\n', 'idle\n'])
+        self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
+        self.make_actor()
+        self.check_success_flag(False, 2)
+        self.check_slurm_got_args(proc_mock, 'NodeName=compute99', 'State=RESUME')
+
+    def test_alloc_node_undrained_when_shutdown_window_closes(self, proc_mock):
+        proc_mock.side_effect = iter(['alloc\n'])
         self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
         self.make_actor()
         self.check_success_flag(False, 2)
-        self.check_slurm_got_args(proc_mock, 'NodeName=compute99',
-                                  'State=RESUME')
+        self.check_slurm_got_args(proc_mock, 'sinfo', '--noheader', '-o', '%t', '-n', 'compute99')
+
+    def test_cancel_shutdown_retry(self, proc_mock):
+        proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n'])
+        self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
+        self.make_actor()
+        self.check_success_flag(False, 2)
+
+    def test_issue_slurm_drain_retry(self, proc_mock):
+        proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
+        self.check_success_after_reset(proc_mock)
+
+    def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
+        proc_mock.return_value = 'drain\n'
+        super(SLURMComputeNodeShutdownActorTestCase,
+              self).test_arvados_node_cleaned_after_shutdown()