X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/3280225de5f5f3325cd406d154fe9eeed9ce020a..0eb72b526bf8bbb011551ecf019f604e17a534f1:/services/nodemanager/tests/test_computenode_dispatch.py diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py index 95b1329fa6..a8aa2e38fb 100644 --- a/services/nodemanager/tests/test_computenode_dispatch.py +++ b/services/nodemanager/tests/test_computenode_dispatch.py @@ -1,4 +1,7 @@ #!/usr/bin/env python +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 from __future__ import absolute_import, print_function @@ -11,7 +14,10 @@ import mock import pykka import threading +from libcloud.common.exceptions import BaseHTTPError + import arvnodeman.computenode.dispatch as dispatch +from arvnodeman.computenode.driver import BaseComputeNodeDriver from . import testutil class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase): @@ -86,6 +92,28 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase): self.make_actor() self.wait_for_assignment(self.setup_actor, 'cloud_node') + def test_basehttperror_retried(self): + self.make_mocks() + self.cloud_client.create_node.side_effect = [ + BaseHTTPError(500, "Try again"), + self.cloud_client.create_node.return_value, + ] + self.make_actor() + self.wait_for_assignment(self.setup_actor, 'cloud_node') + self.assertEqual(1, self.cloud_client.post_create_node.call_count) + + def test_instance_exceeded_not_retried(self): + self.make_mocks() + self.cloud_client.create_node.side_effect = [ + BaseHTTPError(400, "InstanceLimitExceeded"), + self.cloud_client.create_node.return_value, + ] + self.make_actor() + done = self.FUTURE_CLASS() + self.setup_actor.subscribe(done.set) + done.get(self.TIMEOUT) + self.assertEqual(0, self.cloud_client.post_create_node.call_count) + def test_failed_post_create_retried(self): self.make_mocks() self.cloud_client.post_create_node.side_effect = [ @@ -175,14 +203,19 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin): else: self.fail("success flag {} is not {}".format(last_flag, expected)) + def test_cancellable_shutdown(self, *mocks): + self.make_mocks(shutdown_open=True, arvados_node=testutil.arvados_node_mock(crunch_worker_state="busy")) + self.cloud_client.destroy_node.return_value = True + self.make_actor(cancellable=True) + self.check_success_flag(False) + self.assertFalse(self.cloud_client.destroy_node.called) + def test_uncancellable_shutdown(self, *mocks): - self.make_mocks(shutdown_open=False) - self.cloud_client.destroy_node.return_value = False - self.make_actor(cancellable=False) - self.check_success_flag(None, 0) - self.shutdowns._set_state(True, 600) + self.make_mocks(shutdown_open=True, arvados_node=testutil.arvados_node_mock(crunch_worker_state="busy")) self.cloud_client.destroy_node.return_value = True - self.check_success_flag(True) + self.make_actor(cancellable=False) + self.check_success_flag(True, 2) + self.assertTrue(self.cloud_client.destroy_node.called) def test_arvados_node_cleaned_after_shutdown(self, *mocks): cloud_node = testutil.cloud_node_mock(62) @@ -205,7 +238,9 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin): cloud_node = testutil.cloud_node_mock(61) arv_node = testutil.arvados_node_mock(61) self.make_mocks(cloud_node, arv_node, shutdown_open=False) + self.cloud_client.destroy_node.return_value = False self.make_actor(cancellable=True) + self.shutdown_actor.cancel_shutdown("test") self.check_success_flag(False, 2) self.assertFalse(self.arvados_client.nodes().update.called) @@ -219,29 +254,13 @@ class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin, self.check_success_flag(True) self.assertTrue(self.cloud_client.destroy_node.called) - def test_shutdown_cancelled_when_window_closes(self): - self.make_mocks(shutdown_open=False) - self.make_actor() - self.check_success_flag(False, 2) - self.assertFalse(self.cloud_client.destroy_node.called) - self.assertEqual(self.ACTOR_CLASS.WINDOW_CLOSED, - self.shutdown_actor.cancel_reason.get(self.TIMEOUT)) - - def test_shutdown_retries_when_cloud_fails(self): - self.make_mocks() - self.cloud_client.destroy_node.return_value = False - self.make_actor(start_time=0) - self.assertIsNone(self.shutdown_actor.success.get(self.TIMEOUT)) - self.cloud_client.destroy_node.return_value = True - self.check_success_flag(True) - - def test_shutdown_cancelled_when_cloud_fails_on_broken_node(self): + def test_shutdown_cancelled_when_destroy_node_fails(self): self.make_mocks(node_broken=True) self.cloud_client.destroy_node.return_value = False self.make_actor(start_time=0) self.check_success_flag(False, 2) self.assertEqual(1, self.cloud_client.destroy_node.call_count) - self.assertEqual(self.ACTOR_CLASS.NODE_BROKEN, + self.assertEqual(self.ACTOR_CLASS.DESTROY_FAILED, self.shutdown_actor.cancel_reason.get(self.TIMEOUT)) def test_late_subscribe(self): @@ -256,17 +275,30 @@ class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin, class ComputeNodeUpdateActorTestCase(testutil.ActorTestMixin, unittest.TestCase): + ACTOR_CLASS = dispatch.ComputeNodeUpdateActor + def make_actor(self): self.driver = mock.MagicMock(name='driver_mock') - self.updater = dispatch.ComputeNodeUpdateActor.start(self.driver).proxy() + self.timer = mock.MagicMock(name='timer_mock') + self.updater = self.ACTOR_CLASS.start(self.driver, self.timer).proxy() - def test_node_sync(self): + def test_node_sync(self, *args): self.make_actor() cloud_node = testutil.cloud_node_mock() arv_node = testutil.arvados_node_mock() self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT) self.driver().sync_node.assert_called_with(cloud_node, arv_node) + @testutil.no_sleep + def test_node_sync_error(self, *args): + self.make_actor() + cloud_node = testutil.cloud_node_mock() + arv_node = testutil.arvados_node_mock() + self.driver().sync_node.side_effect = (IOError, Exception, True) + self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT) + self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT) + self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT) + self.driver().sync_node.assert_called_with(cloud_node, arv_node) class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, unittest.TestCase): @@ -296,17 +328,24 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, def test_in_state_when_unpaired(self): self.make_actor() - self.assertIsNone(self.node_state('idle', 'busy')) + self.assertTrue(self.node_state('unpaired')) def test_in_state_when_pairing_stale(self): self.make_actor(arv_node=testutil.arvados_node_mock( job_uuid=None, age=90000)) - self.assertIsNone(self.node_state('idle', 'busy')) + self.assertTrue(self.node_state('down')) def test_in_state_when_no_state_available(self): self.make_actor(arv_node=testutil.arvados_node_mock( crunch_worker_state=None)) - self.assertIsNone(self.node_state('idle', 'busy')) + print(self.node_actor.get_state().get()) + self.assertTrue(self.node_state('idle')) + + def test_in_state_when_no_state_available_old(self): + self.make_actor(arv_node=testutil.arvados_node_mock( + crunch_worker_state=None, age=90000)) + print(self.node_actor.get_state().get()) + self.assertTrue(self.node_state('down')) def test_in_idle_state(self): self.make_actor(2, arv_node=testutil.arvados_node_mock(job_uuid=None)) @@ -346,28 +385,32 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, def test_no_shutdown_booting(self): self.make_actor() self.shutdowns._set_state(True, 600) - self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is still booting")) + self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), + (False, "node state is ('unpaired', 'open', 'boot wait', 'idle exceeded')")) def test_shutdown_without_arvados_node(self): self.make_actor(start_time=0) self.shutdowns._set_state(True, 600) - self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT)) + self.assertEquals((True, "node state is ('unpaired', 'open', 'boot exceeded', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) - def test_no_shutdown_missing(self): + def test_shutdown_missing(self): arv_node = testutil.arvados_node_mock(10, job_uuid=None, crunch_worker_state="down", last_ping_at='1970-01-01T01:02:03.04050607Z') self.make_actor(10, arv_node) self.shutdowns._set_state(True, 600) - self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle.")) + self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) - def test_no_shutdown_running_broken(self): + def test_shutdown_running_broken(self): arv_node = testutil.arvados_node_mock(12, job_uuid=None, crunch_worker_state="down") self.make_actor(12, arv_node) self.shutdowns._set_state(True, 600) self.cloud_client.broken.return_value = True - self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle.")) + self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) def test_shutdown_missing_broken(self): arv_node = testutil.arvados_node_mock(11, job_uuid=None, @@ -376,27 +419,31 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, self.make_actor(11, arv_node) self.shutdowns._set_state(True, 600) self.cloud_client.broken.return_value = True - self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT)) + self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')")) def test_no_shutdown_when_window_closed(self): self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None)) - self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("shutdown window is not open.")) + self.assertEquals((False, "node state is ('idle', 'closed', 'boot wait', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) def test_no_shutdown_when_node_running_job(self): self.make_actor(4, testutil.arvados_node_mock(4, job_uuid=True)) self.shutdowns._set_state(True, 600) - self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle.")) + self.assertEquals((False, "node state is ('busy', 'open', 'boot wait', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) - def test_no_shutdown_when_node_state_unknown(self): + def test_shutdown_when_node_state_unknown(self): self.make_actor(5, testutil.arvados_node_mock( 5, crunch_worker_state=None)) self.shutdowns._set_state(True, 600) - self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle.")) + self.assertEquals((True, "node state is ('idle', 'open', 'boot wait', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) def test_no_shutdown_when_node_state_stale(self): self.make_actor(6, testutil.arvados_node_mock(6, age=90000)) self.shutdowns._set_state(True, 600) - self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle.")) + self.assertEquals((False, "node state is stale"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) def test_arvados_node_match(self): self.make_actor(2)