Merge branch 'master' of git.curoverse.com:arvados into 3408-production-datamanager
[arvados.git] / services / nodemanager / tests / test_computenode_dispatch.py
index ece186bcb3bbe83e7096d7bb6853312a0c9922fb..a1dfde30e1c90eeaeeb04e2542bd53866d4eff5f 100644 (file)
@@ -14,7 +14,7 @@ import arvnodeman.computenode.dispatch as dispatch
 from . import testutil
 
 class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
-    def make_mocks(self, arvados_effect=None, cloud_effect=None):
+    def make_mocks(self, arvados_effect=None):
         if arvados_effect is None:
             arvados_effect = [testutil.arvados_node_mock()]
         self.arvados_effect = arvados_effect
@@ -48,14 +48,33 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.assertEqual(self.cloud_client.create_node(),
                          self.setup_actor.cloud_node.get(self.TIMEOUT))
 
-    def test_failed_calls_retried(self):
+    def test_failed_arvados_calls_retried(self):
         self.make_mocks([
                 arverror.ApiError(httplib2.Response({'status': '500'}), ""),
                 testutil.arvados_node_mock(),
                 ])
         self.make_actor()
+        self.wait_for_assignment(self.setup_actor, 'arvados_node')
+
+    def test_failed_cloud_calls_retried(self):
+        self.make_mocks()
+        self.cloud_client.create_node.side_effect = [
+            Exception("test cloud creation error"),
+            self.cloud_client.create_node.return_value,
+            ]
+        self.make_actor()
         self.wait_for_assignment(self.setup_actor, 'cloud_node')
 
+    def test_failed_post_create_retried(self):
+        self.make_mocks()
+        self.cloud_client.post_create_node.side_effect = [
+            Exception("test cloud post-create error"), None]
+        self.make_actor()
+        done = self.FUTURE_CLASS()
+        self.setup_actor.subscribe(done.set)
+        done.get(self.TIMEOUT)
+        self.assertEqual(2, self.cloud_client.post_create_node.call_count)
+
     def test_stop_when_no_cloud_node(self):
         self.make_mocks(
             arverror.ApiError(httplib2.Response({'status': '500'}), ""))
@@ -93,32 +112,78 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
                          subscriber.call_args[0][0].actor_ref.actor_urn)
 
 
-class ComputeNodeShutdownActorTestCase(testutil.ActorTestMixin,
-                                       unittest.TestCase):
-    def make_mocks(self, cloud_node=None):
+class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
+    def make_mocks(self, cloud_node=None, arvados_node=None,
+                   shutdown_open=True):
         self.timer = testutil.MockTimer()
+        self.shutdowns = testutil.MockShutdownTimer()
+        self.shutdowns._set_state(shutdown_open, 300)
         self.cloud_client = mock.MagicMock(name='cloud_client')
+        self.updates = mock.MagicMock(name='update_mock')
         if cloud_node is None:
             cloud_node = testutil.cloud_node_mock()
         self.cloud_node = cloud_node
+        self.arvados_node = arvados_node
 
-    def make_actor(self, arv_node=None):
+    def make_actor(self, cancellable=True):
         if not hasattr(self, 'timer'):
             self.make_mocks()
-        self.shutdown_actor = dispatch.ComputeNodeShutdownActor.start(
-            self.timer, self.cloud_client, self.cloud_node).proxy()
+        monitor_actor = dispatch.ComputeNodeMonitorActor.start(
+            self.cloud_node, time.time(), self.shutdowns, self.timer,
+            self.updates, self.arvados_node)
+        self.shutdown_actor = self.ACTOR_CLASS.start(
+            self.timer, self.cloud_client, monitor_actor, cancellable).proxy()
+        self.monitor_actor = monitor_actor.proxy()
+
+    def check_success_flag(self, expected, allow_msg_count=1):
+        # allow_msg_count is the number of internal messages that may
+        # need to be handled for shutdown to finish.
+        for try_num in range(1 + allow_msg_count):
+            last_flag = self.shutdown_actor.success.get(self.TIMEOUT)
+            if last_flag is expected:
+                break
+        else:
+            self.fail("success flag {} is not {}".format(last_flag, expected))
+
+    def test_uncancellable_shutdown(self, *mocks):
+        self.make_mocks(shutdown_open=False)
+        self.cloud_client.destroy_node.return_value = False
+        self.make_actor(cancellable=False)
+        self.check_success_flag(None, 0)
+        self.shutdowns._set_state(True, 600)
+        self.cloud_client.destroy_node.return_value = True
+        self.check_success_flag(True)
+
+
+class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
+                                       unittest.TestCase):
+    ACTOR_CLASS = dispatch.ComputeNodeShutdownActor
 
     def test_easy_shutdown(self):
         self.make_actor()
-        self.shutdown_actor.cloud_node.get(self.TIMEOUT)
-        self.stop_proxy(self.shutdown_actor)
+        self.check_success_flag(True)
         self.assertTrue(self.cloud_client.destroy_node.called)
 
+    def test_shutdown_cancelled_when_window_closes(self):
+        self.make_mocks(shutdown_open=False)
+        self.make_actor()
+        self.check_success_flag(False, 2)
+        self.assertFalse(self.cloud_client.destroy_node.called)
+
+    def test_shutdown_retries_when_cloud_fails(self):
+        self.make_mocks()
+        self.cloud_client.destroy_node.return_value = False
+        self.make_actor()
+        self.assertIsNone(self.shutdown_actor.success.get(self.TIMEOUT))
+        self.cloud_client.destroy_node.return_value = True
+        self.check_success_flag(True)
+
     def test_late_subscribe(self):
         self.make_actor()
         subscriber = mock.Mock(name='subscriber_mock')
         self.shutdown_actor.subscribe(subscriber).get(self.TIMEOUT)
         self.stop_proxy(self.shutdown_actor)
+        self.assertTrue(subscriber.called)
         self.assertEqual(self.shutdown_actor.actor_ref.actor_urn,
                          subscriber.call_args[0][0].actor_ref.actor_urn)
 
@@ -139,14 +204,8 @@ class ComputeNodeUpdateActorTestCase(testutil.ActorTestMixin,
 
 class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
                                       unittest.TestCase):
-    class MockShutdownTimer(object):
-        def _set_state(self, is_open, next_opening):
-            self.window_open = lambda: is_open
-            self.next_opening = lambda: next_opening
-
-
     def make_mocks(self, node_num):
-        self.shutdowns = self.MockShutdownTimer()
+        self.shutdowns = testutil.MockShutdownTimer()
         self.shutdowns._set_state(False, 300)
         self.timer = mock.MagicMock(name='timer_mock')
         self.updates = mock.MagicMock(name='update_mock')
@@ -196,6 +255,16 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.assertTrue(self.timer.schedule.called)
         self.assertEqual(300, self.timer.schedule.call_args[0][0])
 
+    def test_shutdown_window_close_scheduling(self):
+        self.make_actor()
+        self.shutdowns._set_state(False, 600)
+        self.timer.schedule.reset_mock()
+        self.node_actor.consider_shutdown().get(self.TIMEOUT)
+        self.stop_proxy(self.node_actor)
+        self.assertTrue(self.timer.schedule.called)
+        self.assertEqual(600, self.timer.schedule.call_args[0][0])
+        self.assertFalse(self.subscriber.called)
+
     def test_shutdown_subscription(self):
         self.make_actor()
         self.shutdowns._set_state(True, 600)
@@ -207,41 +276,31 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
     def test_shutdown_without_arvados_node(self):
         self.make_actor()
         self.shutdowns._set_state(True, 600)
-        self.node_actor.consider_shutdown().get(self.TIMEOUT)
-        self.assertTrue(self.subscriber.called)
+        self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     def test_no_shutdown_without_arvados_node_and_old_cloud_node(self):
         self.make_actor(start_time=0)
         self.shutdowns._set_state(True, 600)
-        self.node_actor.consider_shutdown().get(self.TIMEOUT)
-        self.assertFalse(self.subscriber.called)
-
-    def check_shutdown_rescheduled(self, window_open, next_window,
-                                   schedule_time=None):
-        self.shutdowns._set_state(window_open, next_window)
-        self.timer.schedule.reset_mock()
-        self.node_actor.consider_shutdown().get(self.TIMEOUT)
-        self.stop_proxy(self.node_actor)
-        self.assertTrue(self.timer.schedule.called)
-        if schedule_time is not None:
-            self.assertEqual(schedule_time, self.timer.schedule.call_args[0][0])
-        self.assertFalse(self.subscriber.called)
+        self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
-    def test_shutdown_window_close_scheduling(self):
-        self.make_actor()
-        self.check_shutdown_rescheduled(False, 600, 600)
+    def test_no_shutdown_when_window_closed(self):
+        self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None))
+        self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     def test_no_shutdown_when_node_running_job(self):
         self.make_actor(4, testutil.arvados_node_mock(4, job_uuid=True))
-        self.check_shutdown_rescheduled(True, 600)
+        self.shutdowns._set_state(True, 600)
+        self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     def test_no_shutdown_when_node_state_unknown(self):
         self.make_actor(5, testutil.arvados_node_mock(5, info={}))
-        self.check_shutdown_rescheduled(True, 600)
+        self.shutdowns._set_state(True, 600)
+        self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     def test_no_shutdown_when_node_state_stale(self):
         self.make_actor(6, testutil.arvados_node_mock(6, age=90000))
-        self.check_shutdown_rescheduled(True, 600)
+        self.shutdowns._set_state(True, 600)
+        self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     def test_arvados_node_match(self):
         self.make_actor(2)