X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/2a13fcc84b240e368787c8e94ced95d75eee0cc4..bdb92619b5f6d920119b8c32c3027cf4b751ed16:/services/nodemanager/tests/test_computenode_dispatch.py diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py index 8bb0c50042..c3774c1b7a 100644 --- a/services/nodemanager/tests/test_computenode_dispatch.py +++ b/services/nodemanager/tests/test_computenode_dispatch.py @@ -11,7 +11,10 @@ import mock import pykka import threading +from libcloud.common.exceptions import BaseHTTPError + import arvnodeman.computenode.dispatch as dispatch +from arvnodeman.computenode.driver import BaseComputeNodeDriver from . import testutil class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase): @@ -25,6 +28,7 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase): self.api_client.nodes().update().execute.side_effect = arvados_effect self.cloud_client = mock.MagicMock(name='cloud_client') self.cloud_client.create_node.return_value = testutil.cloud_node_mock(1) + self.cloud_client.is_cloud_exception = BaseComputeNodeDriver.is_cloud_exception def make_actor(self, arv_node=None): if not hasattr(self, 'timer'): @@ -86,6 +90,28 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase): self.make_actor() self.wait_for_assignment(self.setup_actor, 'cloud_node') + def test_unknown_basehttperror_not_retried(self): + self.make_mocks() + self.cloud_client.create_node.side_effect = [ + BaseHTTPError(400, "Unknown"), + self.cloud_client.create_node.return_value, + ] + self.make_actor() + finished = threading.Event() + self.setup_actor.subscribe(lambda _: finished.set()) + assert(finished.wait(self.TIMEOUT)) + self.assertEqual(0, self.cloud_client.post_create_node.call_count) + + def test_known_basehttperror_retried(self): + self.make_mocks() + self.cloud_client.create_node.side_effect = [ + BaseHTTPError(400, "InstanceLimitExceeded"), + self.cloud_client.create_node.return_value, + ] + self.make_actor() + self.wait_for_assignment(self.setup_actor, 'cloud_node') + self.assertEqual(1, self.cloud_client.post_create_node.call_count) + def test_failed_post_create_retried(self): self.make_mocks() self.cloud_client.post_create_node.side_effect = [ @@ -300,17 +326,24 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, def test_in_state_when_unpaired(self): self.make_actor() - self.assertIsNone(self.node_state('idle', 'busy')) + self.assertTrue(self.node_state('unpaired')) def test_in_state_when_pairing_stale(self): self.make_actor(arv_node=testutil.arvados_node_mock( job_uuid=None, age=90000)) - self.assertIsNone(self.node_state('idle', 'busy')) + self.assertTrue(self.node_state('down')) def test_in_state_when_no_state_available(self): self.make_actor(arv_node=testutil.arvados_node_mock( crunch_worker_state=None)) - self.assertIsNone(self.node_state('idle', 'busy')) + print(self.node_actor.get_state().get()) + self.assertTrue(self.node_state('idle')) + + def test_in_state_when_no_state_available_old(self): + self.make_actor(arv_node=testutil.arvados_node_mock( + crunch_worker_state=None, age=90000)) + print(self.node_actor.get_state().get()) + self.assertTrue(self.node_state('down')) def test_in_idle_state(self): self.make_actor(2, arv_node=testutil.arvados_node_mock(job_uuid=None)) @@ -351,12 +384,13 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, self.make_actor() self.shutdowns._set_state(True, 600) self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), - "node state is ('unpaired', 'open', 'boot wait', 'idle exceeded')") + (False, "node state is ('unpaired', 'open', 'boot wait', 'idle exceeded')")) def test_shutdown_without_arvados_node(self): self.make_actor(start_time=0) self.shutdowns._set_state(True, 600) - self.assertIs(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT)) + self.assertEquals((True, "node state is ('unpaired', 'open', 'boot exceeded', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) def test_shutdown_missing(self): arv_node = testutil.arvados_node_mock(10, job_uuid=None, @@ -364,7 +398,8 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, last_ping_at='1970-01-01T01:02:03.04050607Z') self.make_actor(10, arv_node) self.shutdowns._set_state(True, 600) - self.assertIs(self.node_actor.shutdown_eligible().get(self.TIMEOUT), True) + self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) def test_shutdown_running_broken(self): arv_node = testutil.arvados_node_mock(12, job_uuid=None, @@ -372,7 +407,8 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, self.make_actor(12, arv_node) self.shutdowns._set_state(True, 600) self.cloud_client.broken.return_value = True - self.assertIs(self.node_actor.shutdown_eligible().get(self.TIMEOUT), True) + self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) def test_shutdown_missing_broken(self): arv_node = testutil.arvados_node_mock(11, job_uuid=None, @@ -381,31 +417,31 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin, self.make_actor(11, arv_node) self.shutdowns._set_state(True, 600) self.cloud_client.broken.return_value = True - self.assertIs(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT)) + self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')")) def test_no_shutdown_when_window_closed(self): self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None)) - self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), - "node state is ('idle', 'closed', 'boot wait', 'idle exceeded')") + self.assertEquals((False, "node state is ('idle', 'closed', 'boot wait', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) def test_no_shutdown_when_node_running_job(self): self.make_actor(4, testutil.arvados_node_mock(4, job_uuid=True)) self.shutdowns._set_state(True, 600) - self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), - "node state is ('busy', 'open', 'boot wait', 'idle exceeded')") + self.assertEquals((False, "node state is ('busy', 'open', 'boot wait', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) - def test_no_shutdown_when_node_state_unknown(self): + def test_shutdown_when_node_state_unknown(self): self.make_actor(5, testutil.arvados_node_mock( 5, crunch_worker_state=None)) self.shutdowns._set_state(True, 600) - self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), - "node is paired but crunch_worker_state is 'None'") + self.assertEquals((True, "node state is ('idle', 'open', 'boot wait', 'idle exceeded')"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) def test_no_shutdown_when_node_state_stale(self): self.make_actor(6, testutil.arvados_node_mock(6, age=90000)) self.shutdowns._set_state(True, 600) - self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), - "node state is stale") + self.assertEquals((False, "node state is stale"), + self.node_actor.shutdown_eligible().get(self.TIMEOUT)) def test_arvados_node_match(self): self.make_actor(2)