X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/22381d218295075e63c06f0b60bcb24cee7b30b6..6be95f5c3a2fcbe6321bba52c20393060e33e637:/services/nodemanager/tests/test_computenode_dispatch.py diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py index 3a1c8bad91..8d69ea958c 100644 --- a/services/nodemanager/tests/test_computenode_dispatch.py +++ b/services/nodemanager/tests/test_computenode_dispatch.py @@ -14,7 +14,7 @@ import arvnodeman.computenode.dispatch as dispatch from . import testutil class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase): - def make_mocks(self, arvados_effect=None, cloud_effect=None): + def make_mocks(self, arvados_effect=None): if arvados_effect is None: arvados_effect = [testutil.arvados_node_mock()] self.arvados_effect = arvados_effect @@ -48,14 +48,33 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase): self.assertEqual(self.cloud_client.create_node(), self.setup_actor.cloud_node.get(self.TIMEOUT)) - def test_failed_calls_retried(self): + def test_failed_arvados_calls_retried(self): self.make_mocks([ arverror.ApiError(httplib2.Response({'status': '500'}), ""), testutil.arvados_node_mock(), ]) self.make_actor() + self.wait_for_assignment(self.setup_actor, 'arvados_node') + + def test_failed_cloud_calls_retried(self): + self.make_mocks() + self.cloud_client.create_node.side_effect = [ + Exception("test cloud creation error"), + self.cloud_client.create_node.return_value, + ] + self.make_actor() self.wait_for_assignment(self.setup_actor, 'cloud_node') + def test_failed_post_create_retried(self): + self.make_mocks() + self.cloud_client.post_create_node.side_effect = [ + Exception("test cloud post-create error"), None] + self.make_actor() + done = self.FUTURE_CLASS() + self.setup_actor.subscribe(done.set) + done.get(self.TIMEOUT) + self.assertEqual(2, self.cloud_client.post_create_node.call_count) + def test_stop_when_no_cloud_node(self): self.make_mocks( arverror.ApiError(httplib2.Response({'status': '500'}), "")) @@ -93,32 +112,78 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase): subscriber.call_args[0][0].actor_ref.actor_urn) -class ComputeNodeShutdownActorTestCase(testutil.ActorTestMixin, - unittest.TestCase): - def make_mocks(self, cloud_node=None): +class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin): + def make_mocks(self, cloud_node=None, arvados_node=None, + shutdown_open=True): self.timer = testutil.MockTimer() + self.shutdowns = testutil.MockShutdownTimer() + self.shutdowns._set_state(shutdown_open, 300) self.cloud_client = mock.MagicMock(name='cloud_client') + self.updates = mock.MagicMock(name='update_mock') if cloud_node is None: cloud_node = testutil.cloud_node_mock() self.cloud_node = cloud_node + self.arvados_node = arvados_node - def make_actor(self, arv_node=None): + def make_actor(self, cancellable=True): if not hasattr(self, 'timer'): self.make_mocks() - self.shutdown_actor = dispatch.ComputeNodeShutdownActor.start( - self.timer, self.cloud_client, self.cloud_node).proxy() + monitor_actor = dispatch.ComputeNodeMonitorActor.start( + self.cloud_node, time.time(), self.shutdowns, self.timer, + self.updates, self.arvados_node) + self.shutdown_actor = self.ACTOR_CLASS.start( + self.timer, self.cloud_client, monitor_actor, cancellable).proxy() + self.monitor_actor = monitor_actor.proxy() + + def check_success_flag(self, expected, allow_msg_count=1): + # allow_msg_count is the number of internal messages that may + # need to be handled for shutdown to finish. + for try_num in range(1 + allow_msg_count): + last_flag = self.shutdown_actor.success.get(self.TIMEOUT) + if last_flag is expected: + break + else: + self.fail("success flag {} is not {}".format(last_flag, expected)) + + def test_uncancellable_shutdown(self, *mocks): + self.make_mocks(shutdown_open=False) + self.cloud_client.destroy_node.return_value = False + self.make_actor(cancellable=False) + self.check_success_flag(None, 0) + self.shutdowns._set_state(True, 600) + self.cloud_client.destroy_node.return_value = True + self.check_success_flag(True) + + +class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin, + unittest.TestCase): + ACTOR_CLASS = dispatch.ComputeNodeShutdownActor def test_easy_shutdown(self): self.make_actor() - self.shutdown_actor.cloud_node.get(self.TIMEOUT) - self.stop_proxy(self.shutdown_actor) + self.check_success_flag(True) self.assertTrue(self.cloud_client.destroy_node.called) + def test_shutdown_cancelled_when_window_closes(self): + self.make_mocks(shutdown_open=False) + self.make_actor() + self.check_success_flag(False, 2) + self.assertFalse(self.cloud_client.destroy_node.called) + + def test_shutdown_retries_when_cloud_fails(self): + self.make_mocks() + self.cloud_client.destroy_node.return_value = False + self.make_actor() + self.assertIsNone(self.shutdown_actor.success.get(self.TIMEOUT)) + self.cloud_client.destroy_node.return_value = True + self.check_success_flag(True) + def test_late_subscribe(self): self.make_actor() subscriber = mock.Mock(name='subscriber_mock') self.shutdown_actor.subscribe(subscriber).get(self.TIMEOUT) self.stop_proxy(self.shutdown_actor) + self.assertTrue(subscriber.called) self.assertEqual(self.shutdown_actor.actor_ref.actor_urn, subscriber.call_args[0][0].actor_ref.actor_urn) @@ -139,14 +204,8 @@ class ComputeNodeUpdateActorTestCase(testutil.ActorTestMixin, class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, unittest.TestCase): - class MockShutdownTimer(object): - def _set_state(self, is_open, next_opening): - self.window_open = lambda: is_open - self.next_opening = lambda: next_opening - - def make_mocks(self, node_num): - self.shutdowns = self.MockShutdownTimer() + self.shutdowns = testutil.MockShutdownTimer() self.shutdowns._set_state(False, 300) self.timer = mock.MagicMock(name='timer_mock') self.updates = mock.MagicMock(name='update_mock') @@ -168,28 +227,29 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, def test_in_state_when_unpaired(self): self.make_actor() - self.assertIsNone(self.node_state('idle', 'alloc')) + self.assertIsNone(self.node_state('idle', 'busy')) def test_in_state_when_pairing_stale(self): self.make_actor(arv_node=testutil.arvados_node_mock( job_uuid=None, age=90000)) - self.assertIsNone(self.node_state('idle', 'alloc')) + self.assertIsNone(self.node_state('idle', 'busy')) def test_in_state_when_no_state_available(self): - self.make_actor(arv_node=testutil.arvados_node_mock(info={})) - self.assertIsNone(self.node_state('idle', 'alloc')) + self.make_actor(arv_node=testutil.arvados_node_mock( + crunch_worker_state=None)) + self.assertIsNone(self.node_state('idle', 'busy')) def test_in_idle_state(self): self.make_actor(2, arv_node=testutil.arvados_node_mock(job_uuid=None)) self.assertTrue(self.node_state('idle')) - self.assertFalse(self.node_state('alloc')) - self.assertTrue(self.node_state('idle', 'alloc')) + self.assertFalse(self.node_state('busy')) + self.assertTrue(self.node_state('idle', 'busy')) - def test_in_alloc_state(self): + def test_in_busy_state(self): self.make_actor(3, arv_node=testutil.arvados_node_mock(job_uuid=True)) self.assertFalse(self.node_state('idle')) - self.assertTrue(self.node_state('alloc')) - self.assertTrue(self.node_state('idle', 'alloc')) + self.assertTrue(self.node_state('busy')) + self.assertTrue(self.node_state('idle', 'busy')) def test_init_shutdown_scheduling(self): self.make_actor() @@ -234,7 +294,8 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT)) def test_no_shutdown_when_node_state_unknown(self): - self.make_actor(5, testutil.arvados_node_mock(5, info={})) + self.make_actor(5, testutil.arvados_node_mock( + 5, crunch_worker_state=None)) self.shutdowns._set_state(True, 600) self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT)) @@ -258,6 +319,13 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, self.assertIsNone( self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT)) + def test_arvados_node_mismatch_first_ping_too_early(self): + self.make_actor(4) + arv_node = testutil.arvados_node_mock( + 4, first_ping_at='1971-03-02T14:15:16.1717282Z') + self.assertIsNone( + self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT)) + def test_update_cloud_node(self): self.make_actor(1) self.make_mocks(2)