import httplib2
import mock
import pykka
+import threading
+
+from libcloud.common.exceptions import BaseHTTPError
import arvnodeman.computenode.dispatch as dispatch
+from arvnodeman.computenode.driver import BaseComputeNodeDriver
from . import testutil
class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
self.api_client.nodes().update().execute.side_effect = arvados_effect
self.cloud_client = mock.MagicMock(name='cloud_client')
self.cloud_client.create_node.return_value = testutil.cloud_node_mock(1)
+ self.cloud_client.is_cloud_exception = BaseComputeNodeDriver.is_cloud_exception
def make_actor(self, arv_node=None):
if not hasattr(self, 'timer'):
- self.make_mocks(arvados_effect=[arv_node])
+ self.make_mocks(arvados_effect=[arv_node] if arv_node else None)
self.setup_actor = dispatch.ComputeNodeSetupActor.start(
self.timer, self.api_client, self.cloud_client,
testutil.MockSize(1), arv_node).proxy()
+ def assert_node_properties_updated(self, uuid=None,
+ size=testutil.MockSize(1)):
+ self.api_client.nodes().update.assert_any_call(
+ uuid=(uuid or self.arvados_effect[-1]['uuid']),
+ body={
+ 'properties': {
+ 'cloud_node': {
+ 'size': size.id,
+ 'price': size.price}}})
+
def test_creation_without_arvados_node(self):
self.make_actor()
+ finished = threading.Event()
+ self.setup_actor.subscribe(lambda _: finished.set())
self.assertEqual(self.arvados_effect[-1],
self.setup_actor.arvados_node.get(self.TIMEOUT))
- self.assertTrue(self.api_client.nodes().create().execute.called)
+ assert(finished.wait(self.TIMEOUT))
+ self.assertEqual(1, self.api_client.nodes().create().execute.call_count)
+ self.assertEqual(1, self.api_client.nodes().update().execute.call_count)
+ self.assert_node_properties_updated()
self.assertEqual(self.cloud_client.create_node(),
self.setup_actor.cloud_node.get(self.TIMEOUT))
def test_creation_with_arvados_node(self):
+ self.make_mocks(arvados_effect=[testutil.arvados_node_mock()]*2)
self.make_actor(testutil.arvados_node_mock())
+ finished = threading.Event()
+ self.setup_actor.subscribe(lambda _: finished.set())
self.assertEqual(self.arvados_effect[-1],
self.setup_actor.arvados_node.get(self.TIMEOUT))
- self.assertTrue(self.api_client.nodes().update().execute.called)
+ assert(finished.wait(self.TIMEOUT))
+ self.assert_node_properties_updated()
+ self.assertEqual(2, self.api_client.nodes().update().execute.call_count)
self.assertEqual(self.cloud_client.create_node(),
self.setup_actor.cloud_node.get(self.TIMEOUT))
self.make_actor()
self.wait_for_assignment(self.setup_actor, 'cloud_node')
+ def test_unknown_basehttperror_not_retried(self):
+ self.make_mocks()
+ self.cloud_client.create_node.side_effect = [
+ BaseHTTPError(400, "Unknown"),
+ self.cloud_client.create_node.return_value,
+ ]
+ self.make_actor()
+ finished = threading.Event()
+ self.setup_actor.subscribe(lambda _: finished.set())
+ assert(finished.wait(self.TIMEOUT))
+ self.assertEqual(0, self.cloud_client.post_create_node.call_count)
+
+ def test_known_basehttperror_retried(self):
+ self.make_mocks()
+ self.cloud_client.create_node.side_effect = [
+ BaseHTTPError(400, "InstanceLimitExceeded"),
+ self.cloud_client.create_node.return_value,
+ ]
+ self.make_actor()
+ self.wait_for_assignment(self.setup_actor, 'cloud_node')
+ self.assertEqual(1, self.cloud_client.post_create_node.call_count)
+
def test_failed_post_create_retried(self):
self.make_mocks()
self.cloud_client.post_create_node.side_effect = [
self.make_actor()
subscriber = mock.Mock(name='subscriber_mock')
self.setup_actor.subscribe(subscriber)
- self.api_client.nodes().create().execute.side_effect = [
- testutil.arvados_node_mock()]
+ retry_resp = [testutil.arvados_node_mock()]
+ self.api_client.nodes().create().execute.side_effect = retry_resp
+ self.api_client.nodes().update().execute.side_effect = retry_resp
self.wait_for_assignment(self.setup_actor, 'cloud_node')
self.assertEqual(self.setup_actor.actor_ref.actor_urn,
subscriber.call_args[0][0].actor_ref.actor_urn)
class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
def make_mocks(self, cloud_node=None, arvados_node=None,
- shutdown_open=True):
+ shutdown_open=True, node_broken=False):
self.timer = testutil.MockTimer()
self.shutdowns = testutil.MockShutdownTimer()
self.shutdowns._set_state(shutdown_open, 300)
self.cloud_client = mock.MagicMock(name='cloud_client')
+ self.cloud_client.broken.return_value = node_broken
self.arvados_client = mock.MagicMock(name='arvados_client')
self.updates = mock.MagicMock(name='update_mock')
if cloud_node is None:
cloud_node = testutil.cloud_node_mock(61)
arv_node = testutil.arvados_node_mock(61)
self.make_mocks(cloud_node, arv_node, shutdown_open=False)
+ self.cloud_client.destroy_node.return_value = False
self.make_actor(cancellable=True)
+ self.shutdown_actor.cancel_shutdown("test")
self.check_success_flag(False, 2)
self.assertFalse(self.arvados_client.nodes().update.called)
self.check_success_flag(True)
self.assertTrue(self.cloud_client.destroy_node.called)
- def test_shutdown_cancelled_when_window_closes(self):
- self.make_mocks(shutdown_open=False)
- self.make_actor()
- self.check_success_flag(False, 2)
- self.assertFalse(self.cloud_client.destroy_node.called)
-
def test_shutdown_retries_when_cloud_fails(self):
self.make_mocks()
self.cloud_client.destroy_node.return_value = False
self.cloud_client.destroy_node.return_value = True
self.check_success_flag(True)
+ def test_shutdown_cancelled_when_cloud_fails_on_broken_node(self):
+ self.make_mocks(node_broken=True)
+ self.cloud_client.destroy_node.return_value = False
+ self.make_actor(start_time=0)
+ self.check_success_flag(False, 2)
+ self.assertEqual(1, self.cloud_client.destroy_node.call_count)
+ self.assertEqual(self.ACTOR_CLASS.NODE_BROKEN,
+ self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
+
def test_late_subscribe(self):
self.make_actor()
subscriber = mock.Mock(name='subscriber_mock')
self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
self.driver().sync_node.assert_called_with(cloud_node, arv_node)
+ @testutil.no_sleep
+ def test_node_sync_error(self):
+ self.make_actor()
+ cloud_node = testutil.cloud_node_mock()
+ arv_node = testutil.arvados_node_mock()
+ self.driver().sync_node.side_effect = (IOError, Exception, True)
+ self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
+ self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
+ self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
+ self.driver().sync_node.assert_called_with(cloud_node, arv_node)
class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
unittest.TestCase):
def test_in_state_when_unpaired(self):
self.make_actor()
- self.assertIsNone(self.node_state('idle', 'busy'))
+ self.assertTrue(self.node_state('unpaired'))
def test_in_state_when_pairing_stale(self):
self.make_actor(arv_node=testutil.arvados_node_mock(
job_uuid=None, age=90000))
- self.assertIsNone(self.node_state('idle', 'busy'))
+ self.assertTrue(self.node_state('down'))
def test_in_state_when_no_state_available(self):
self.make_actor(arv_node=testutil.arvados_node_mock(
crunch_worker_state=None))
- self.assertIsNone(self.node_state('idle', 'busy'))
+ print(self.node_actor.get_state().get())
+ self.assertTrue(self.node_state('idle'))
+
+ def test_in_state_when_no_state_available_old(self):
+ self.make_actor(arv_node=testutil.arvados_node_mock(
+ crunch_worker_state=None, age=90000))
+ print(self.node_actor.get_state().get())
+ self.assertTrue(self.node_state('down'))
def test_in_idle_state(self):
self.make_actor(2, arv_node=testutil.arvados_node_mock(job_uuid=None))
def test_no_shutdown_booting(self):
self.make_actor()
self.shutdowns._set_state(True, 600)
- self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
+ (False, "node state is ('unpaired', 'open', 'boot wait', 'idle exceeded')"))
def test_shutdown_without_arvados_node(self):
self.make_actor(start_time=0)
self.shutdowns._set_state(True, 600)
- self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ self.assertEquals((True, "node state is ('unpaired', 'open', 'boot exceeded', 'idle exceeded')"),
+ self.node_actor.shutdown_eligible().get(self.TIMEOUT))
- def test_no_shutdown_missing(self):
+ def test_shutdown_missing(self):
arv_node = testutil.arvados_node_mock(10, job_uuid=None,
crunch_worker_state="down",
last_ping_at='1970-01-01T01:02:03.04050607Z')
self.make_actor(10, arv_node)
self.shutdowns._set_state(True, 600)
- self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"),
+ self.node_actor.shutdown_eligible().get(self.TIMEOUT))
- def test_no_shutdown_running_broken(self):
+ def test_shutdown_running_broken(self):
arv_node = testutil.arvados_node_mock(12, job_uuid=None,
crunch_worker_state="down")
self.make_actor(12, arv_node)
self.shutdowns._set_state(True, 600)
self.cloud_client.broken.return_value = True
- self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"),
+ self.node_actor.shutdown_eligible().get(self.TIMEOUT))
def test_shutdown_missing_broken(self):
arv_node = testutil.arvados_node_mock(11, job_uuid=None,
self.make_actor(11, arv_node)
self.shutdowns._set_state(True, 600)
self.cloud_client.broken.return_value = True
- self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"))
def test_no_shutdown_when_window_closed(self):
self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None))
- self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ self.assertEquals((False, "node state is ('idle', 'closed', 'boot wait', 'idle exceeded')"),
+ self.node_actor.shutdown_eligible().get(self.TIMEOUT))
def test_no_shutdown_when_node_running_job(self):
self.make_actor(4, testutil.arvados_node_mock(4, job_uuid=True))
self.shutdowns._set_state(True, 600)
- self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ self.assertEquals((False, "node state is ('busy', 'open', 'boot wait', 'idle exceeded')"),
+ self.node_actor.shutdown_eligible().get(self.TIMEOUT))
- def test_no_shutdown_when_node_state_unknown(self):
+ def test_shutdown_when_node_state_unknown(self):
self.make_actor(5, testutil.arvados_node_mock(
5, crunch_worker_state=None))
self.shutdowns._set_state(True, 600)
- self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ self.assertEquals((True, "node state is ('idle', 'open', 'boot wait', 'idle exceeded')"),
+ self.node_actor.shutdown_eligible().get(self.TIMEOUT))
def test_no_shutdown_when_node_state_stale(self):
self.make_actor(6, testutil.arvados_node_mock(6, age=90000))
self.shutdowns._set_state(True, 600)
- self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ self.assertEquals((False, "node state is stale"),
+ self.node_actor.shutdown_eligible().get(self.TIMEOUT))
def test_arvados_node_match(self):
self.make_actor(2)
arv_node = testutil.arvados_node_mock(
2, hostname='compute-two.zzzzz.arvadosapi.com')
+ self.cloud_client.node_id.return_value = '2'
pair_id = self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT)
self.assertEqual(self.cloud_mock.id, pair_id)
self.stop_proxy(self.node_actor)