3 from __future__ import absolute_import, print_function
8 import arvados.errors as arverror
14 import arvnodeman.computenode.dispatch as dispatch
15 from . import testutil
17 class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
18 def make_mocks(self, arvados_effect=None):
19 if arvados_effect is None:
20 arvados_effect = [testutil.arvados_node_mock()]
21 self.arvados_effect = arvados_effect
22 self.timer = testutil.MockTimer()
23 self.api_client = mock.MagicMock(name='api_client')
24 self.api_client.nodes().create().execute.side_effect = arvados_effect
25 self.api_client.nodes().update().execute.side_effect = arvados_effect
26 self.cloud_client = mock.MagicMock(name='cloud_client')
27 self.cloud_client.create_node.return_value = testutil.cloud_node_mock(1)
29 def make_actor(self, arv_node=None):
30 if not hasattr(self, 'timer'):
31 self.make_mocks(arvados_effect=[arv_node] if arv_node else None)
32 self.setup_actor = dispatch.ComputeNodeSetupActor.start(
33 self.timer, self.api_client, self.cloud_client,
34 testutil.MockSize(1), arv_node).proxy()
36 def assert_node_properties_updated(self, uuid=None,
37 size=testutil.MockSize(1)):
38 self.api_client.nodes().update.assert_any_call(
39 uuid=(uuid or self.arvados_effect[-1]['uuid']),
44 'price': size.price}}})
46 def test_creation_without_arvados_node(self):
48 finished = threading.Event()
49 self.setup_actor.subscribe(lambda _: finished.set())
50 self.assertEqual(self.arvados_effect[-1],
51 self.setup_actor.arvados_node.get(self.TIMEOUT))
52 assert(finished.wait(self.TIMEOUT))
53 self.assertEqual(1, self.api_client.nodes().create().execute.call_count)
54 self.assertEqual(1, self.api_client.nodes().update().execute.call_count)
55 self.assert_node_properties_updated()
56 self.assertEqual(self.cloud_client.create_node(),
57 self.setup_actor.cloud_node.get(self.TIMEOUT))
59 def test_creation_with_arvados_node(self):
60 self.make_mocks(arvados_effect=[testutil.arvados_node_mock()]*2)
61 self.make_actor(testutil.arvados_node_mock())
62 finished = threading.Event()
63 self.setup_actor.subscribe(lambda _: finished.set())
64 self.assertEqual(self.arvados_effect[-1],
65 self.setup_actor.arvados_node.get(self.TIMEOUT))
66 assert(finished.wait(self.TIMEOUT))
67 self.assert_node_properties_updated()
68 self.assertEqual(2, self.api_client.nodes().update().execute.call_count)
69 self.assertEqual(self.cloud_client.create_node(),
70 self.setup_actor.cloud_node.get(self.TIMEOUT))
72 def test_failed_arvados_calls_retried(self):
74 arverror.ApiError(httplib2.Response({'status': '500'}), ""),
75 testutil.arvados_node_mock(),
78 self.wait_for_assignment(self.setup_actor, 'arvados_node')
80 def test_failed_cloud_calls_retried(self):
82 self.cloud_client.create_node.side_effect = [
83 Exception("test cloud creation error"),
84 self.cloud_client.create_node.return_value,
87 self.wait_for_assignment(self.setup_actor, 'cloud_node')
89 def test_failed_post_create_retried(self):
91 self.cloud_client.post_create_node.side_effect = [
92 Exception("test cloud post-create error"), None]
94 done = self.FUTURE_CLASS()
95 self.setup_actor.subscribe(done.set)
96 done.get(self.TIMEOUT)
97 self.assertEqual(2, self.cloud_client.post_create_node.call_count)
99 def test_stop_when_no_cloud_node(self):
101 arverror.ApiError(httplib2.Response({'status': '500'}), ""))
104 self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
106 self.setup_actor.actor_ref.actor_stopped.wait(self.TIMEOUT))
108 def test_no_stop_when_cloud_node(self):
110 self.wait_for_assignment(self.setup_actor, 'cloud_node')
112 self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
113 self.assertTrue(self.stop_proxy(self.setup_actor),
114 "actor was stopped by stop_if_no_cloud_node")
116 def test_subscribe(self):
118 arverror.ApiError(httplib2.Response({'status': '500'}), ""))
120 subscriber = mock.Mock(name='subscriber_mock')
121 self.setup_actor.subscribe(subscriber)
122 retry_resp = [testutil.arvados_node_mock()]
123 self.api_client.nodes().create().execute.side_effect = retry_resp
124 self.api_client.nodes().update().execute.side_effect = retry_resp
125 self.wait_for_assignment(self.setup_actor, 'cloud_node')
126 self.assertEqual(self.setup_actor.actor_ref.actor_urn,
127 subscriber.call_args[0][0].actor_ref.actor_urn)
129 def test_late_subscribe(self):
131 subscriber = mock.Mock(name='subscriber_mock')
132 self.wait_for_assignment(self.setup_actor, 'cloud_node')
133 self.setup_actor.subscribe(subscriber).get(self.TIMEOUT)
134 self.stop_proxy(self.setup_actor)
135 self.assertEqual(self.setup_actor.actor_ref.actor_urn,
136 subscriber.call_args[0][0].actor_ref.actor_urn)
139 class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
140 def make_mocks(self, cloud_node=None, arvados_node=None,
141 shutdown_open=True, node_broken=False):
142 self.timer = testutil.MockTimer()
143 self.shutdowns = testutil.MockShutdownTimer()
144 self.shutdowns._set_state(shutdown_open, 300)
145 self.cloud_client = mock.MagicMock(name='cloud_client')
146 self.cloud_client.broken.return_value = node_broken
147 self.arvados_client = mock.MagicMock(name='arvados_client')
148 self.updates = mock.MagicMock(name='update_mock')
149 if cloud_node is None:
150 cloud_node = testutil.cloud_node_mock()
151 self.cloud_node = cloud_node
152 self.arvados_node = arvados_node
154 def make_actor(self, cancellable=True, start_time=None):
155 if not hasattr(self, 'timer'):
157 if start_time is None:
158 start_time = time.time()
159 monitor_actor = dispatch.ComputeNodeMonitorActor.start(
160 self.cloud_node, start_time, self.shutdowns,
161 testutil.cloud_node_fqdn, self.timer, self.updates, self.cloud_client,
163 self.shutdown_actor = self.ACTOR_CLASS.start(
164 self.timer, self.cloud_client, self.arvados_client, monitor_actor,
166 self.monitor_actor = monitor_actor.proxy()
168 def check_success_flag(self, expected, allow_msg_count=1):
169 # allow_msg_count is the number of internal messages that may
170 # need to be handled for shutdown to finish.
171 for try_num in range(1 + allow_msg_count):
172 last_flag = self.shutdown_actor.success.get(self.TIMEOUT)
173 if last_flag is expected:
176 self.fail("success flag {} is not {}".format(last_flag, expected))
178 def test_uncancellable_shutdown(self, *mocks):
179 self.make_mocks(shutdown_open=False)
180 self.cloud_client.destroy_node.return_value = False
181 self.make_actor(cancellable=False)
182 self.check_success_flag(None, 0)
183 self.shutdowns._set_state(True, 600)
184 self.cloud_client.destroy_node.return_value = True
185 self.check_success_flag(True)
187 def test_arvados_node_cleaned_after_shutdown(self, *mocks):
188 cloud_node = testutil.cloud_node_mock(62)
189 arv_node = testutil.arvados_node_mock(62)
190 self.make_mocks(cloud_node, arv_node)
192 self.check_success_flag(True, 3)
193 update_mock = self.arvados_client.nodes().update
194 self.assertTrue(update_mock.called)
195 update_kwargs = update_mock.call_args_list[0][1]
196 self.assertEqual(arv_node['uuid'], update_kwargs.get('uuid'))
197 self.assertIn('body', update_kwargs)
198 for clear_key in ['slot_number', 'hostname', 'ip_address',
199 'first_ping_at', 'last_ping_at']:
200 self.assertIn(clear_key, update_kwargs['body'])
201 self.assertIsNone(update_kwargs['body'][clear_key])
202 self.assertTrue(update_mock().execute.called)
204 def test_arvados_node_not_cleaned_after_shutdown_cancelled(self, *mocks):
205 cloud_node = testutil.cloud_node_mock(61)
206 arv_node = testutil.arvados_node_mock(61)
207 self.make_mocks(cloud_node, arv_node, shutdown_open=False)
208 self.make_actor(cancellable=True)
209 self.check_success_flag(False, 2)
210 self.assertFalse(self.arvados_client.nodes().update.called)
213 class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
215 ACTOR_CLASS = dispatch.ComputeNodeShutdownActor
217 def test_easy_shutdown(self):
218 self.make_actor(start_time=0)
219 self.check_success_flag(True)
220 self.assertTrue(self.cloud_client.destroy_node.called)
222 def test_shutdown_cancelled_when_window_closes(self):
223 self.make_mocks(shutdown_open=False)
225 self.check_success_flag(False, 2)
226 self.assertFalse(self.cloud_client.destroy_node.called)
227 self.assertEqual(self.ACTOR_CLASS.WINDOW_CLOSED,
228 self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
230 def test_shutdown_retries_when_cloud_fails(self):
232 self.cloud_client.destroy_node.return_value = False
233 self.make_actor(start_time=0)
234 self.assertIsNone(self.shutdown_actor.success.get(self.TIMEOUT))
235 self.cloud_client.destroy_node.return_value = True
236 self.check_success_flag(True)
238 def test_shutdown_cancelled_when_cloud_fails_on_broken_node(self):
239 self.make_mocks(node_broken=True)
240 self.cloud_client.destroy_node.return_value = False
241 self.make_actor(start_time=0)
242 self.check_success_flag(False, 2)
243 self.assertEqual(1, self.cloud_client.destroy_node.call_count)
244 self.assertEqual(self.ACTOR_CLASS.NODE_BROKEN,
245 self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
247 def test_late_subscribe(self):
249 subscriber = mock.Mock(name='subscriber_mock')
250 self.shutdown_actor.subscribe(subscriber).get(self.TIMEOUT)
251 self.stop_proxy(self.shutdown_actor)
252 self.assertTrue(subscriber.called)
253 self.assertEqual(self.shutdown_actor.actor_ref.actor_urn,
254 subscriber.call_args[0][0].actor_ref.actor_urn)
257 class ComputeNodeUpdateActorTestCase(testutil.ActorTestMixin,
259 def make_actor(self):
260 self.driver = mock.MagicMock(name='driver_mock')
261 self.updater = dispatch.ComputeNodeUpdateActor.start(self.driver).proxy()
263 def test_node_sync(self):
265 cloud_node = testutil.cloud_node_mock()
266 arv_node = testutil.arvados_node_mock()
267 self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
268 self.driver().sync_node.assert_called_with(cloud_node, arv_node)
271 class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
273 def make_mocks(self, node_num):
274 self.shutdowns = testutil.MockShutdownTimer()
275 self.shutdowns._set_state(False, 300)
276 self.timer = mock.MagicMock(name='timer_mock')
277 self.updates = mock.MagicMock(name='update_mock')
278 self.cloud_mock = testutil.cloud_node_mock(node_num)
279 self.subscriber = mock.Mock(name='subscriber_mock')
280 self.cloud_client = mock.MagicMock(name='cloud_client')
281 self.cloud_client.broken.return_value = False
283 def make_actor(self, node_num=1, arv_node=None, start_time=None):
284 if not hasattr(self, 'cloud_mock'):
285 self.make_mocks(node_num)
286 if start_time is None:
287 start_time = time.time()
288 self.node_actor = dispatch.ComputeNodeMonitorActor.start(
289 self.cloud_mock, start_time, self.shutdowns,
290 testutil.cloud_node_fqdn, self.timer, self.updates, self.cloud_client,
291 arv_node, boot_fail_after=300).proxy()
292 self.node_actor.subscribe(self.subscriber).get(self.TIMEOUT)
294 def node_state(self, *states):
295 return self.node_actor.in_state(*states).get(self.TIMEOUT)
297 def test_in_state_when_unpaired(self):
299 self.assertIsNone(self.node_state('idle', 'busy'))
301 def test_in_state_when_pairing_stale(self):
302 self.make_actor(arv_node=testutil.arvados_node_mock(
303 job_uuid=None, age=90000))
304 self.assertIsNone(self.node_state('idle', 'busy'))
306 def test_in_state_when_no_state_available(self):
307 self.make_actor(arv_node=testutil.arvados_node_mock(
308 crunch_worker_state=None))
309 self.assertIsNone(self.node_state('idle', 'busy'))
311 def test_in_idle_state(self):
312 self.make_actor(2, arv_node=testutil.arvados_node_mock(job_uuid=None))
313 self.assertTrue(self.node_state('idle'))
314 self.assertFalse(self.node_state('busy'))
315 self.assertTrue(self.node_state('idle', 'busy'))
317 def test_in_busy_state(self):
318 self.make_actor(3, arv_node=testutil.arvados_node_mock(job_uuid=True))
319 self.assertFalse(self.node_state('idle'))
320 self.assertTrue(self.node_state('busy'))
321 self.assertTrue(self.node_state('idle', 'busy'))
323 def test_init_shutdown_scheduling(self):
325 self.assertTrue(self.timer.schedule.called)
326 self.assertEqual(300, self.timer.schedule.call_args[0][0])
328 def test_shutdown_window_close_scheduling(self):
330 self.shutdowns._set_state(False, 600)
331 self.timer.schedule.reset_mock()
332 self.node_actor.consider_shutdown().get(self.TIMEOUT)
333 self.stop_proxy(self.node_actor)
334 self.assertTrue(self.timer.schedule.called)
335 self.assertEqual(600, self.timer.schedule.call_args[0][0])
336 self.assertFalse(self.subscriber.called)
338 def test_shutdown_subscription(self):
339 self.make_actor(start_time=0)
340 self.shutdowns._set_state(True, 600)
341 self.node_actor.consider_shutdown().get(self.TIMEOUT)
342 self.assertTrue(self.subscriber.called)
343 self.assertEqual(self.node_actor.actor_ref.actor_urn,
344 self.subscriber.call_args[0][0].actor_ref.actor_urn)
346 def test_no_shutdown_booting(self):
348 self.shutdowns._set_state(True, 600)
349 self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is still booting"))
351 def test_shutdown_without_arvados_node(self):
352 self.make_actor(start_time=0)
353 self.shutdowns._set_state(True, 600)
354 self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
356 def test_no_shutdown_missing(self):
357 arv_node = testutil.arvados_node_mock(10, job_uuid=None,
358 crunch_worker_state="down",
359 last_ping_at='1970-01-01T01:02:03.04050607Z')
360 self.make_actor(10, arv_node)
361 self.shutdowns._set_state(True, 600)
362 self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle."))
364 def test_no_shutdown_running_broken(self):
365 arv_node = testutil.arvados_node_mock(12, job_uuid=None,
366 crunch_worker_state="down")
367 self.make_actor(12, arv_node)
368 self.shutdowns._set_state(True, 600)
369 self.cloud_client.broken.return_value = True
370 self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle."))
372 def test_shutdown_missing_broken(self):
373 arv_node = testutil.arvados_node_mock(11, job_uuid=None,
374 crunch_worker_state="down",
375 last_ping_at='1970-01-01T01:02:03.04050607Z')
376 self.make_actor(11, arv_node)
377 self.shutdowns._set_state(True, 600)
378 self.cloud_client.broken.return_value = True
379 self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
381 def test_no_shutdown_when_window_closed(self):
382 self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None))
383 self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("shutdown window is not open."))
385 def test_no_shutdown_when_node_running_job(self):
386 self.make_actor(4, testutil.arvados_node_mock(4, job_uuid=True))
387 self.shutdowns._set_state(True, 600)
388 self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle."))
390 def test_no_shutdown_when_node_state_unknown(self):
391 self.make_actor(5, testutil.arvados_node_mock(
392 5, crunch_worker_state=None))
393 self.shutdowns._set_state(True, 600)
394 self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle."))
396 def test_no_shutdown_when_node_state_stale(self):
397 self.make_actor(6, testutil.arvados_node_mock(6, age=90000))
398 self.shutdowns._set_state(True, 600)
399 self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle."))
401 def test_arvados_node_match(self):
403 arv_node = testutil.arvados_node_mock(
404 2, hostname='compute-two.zzzzz.arvadosapi.com')
405 self.cloud_client.node_id.return_value = '2'
406 pair_id = self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT)
407 self.assertEqual(self.cloud_mock.id, pair_id)
408 self.stop_proxy(self.node_actor)
409 self.updates.sync_node.assert_called_with(self.cloud_mock, arv_node)
411 def test_arvados_node_mismatch(self):
413 arv_node = testutil.arvados_node_mock(1)
415 self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT))
417 def test_arvados_node_mismatch_first_ping_too_early(self):
419 arv_node = testutil.arvados_node_mock(
420 4, first_ping_at='1971-03-02T14:15:16.1717282Z')
422 self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT))
424 def test_update_cloud_node(self):
427 self.cloud_mock.id = '1'
428 self.node_actor.update_cloud_node(self.cloud_mock)
429 current_cloud = self.node_actor.cloud_node.get(self.TIMEOUT)
430 self.assertEqual([testutil.ip_address_mock(2)],
431 current_cloud.private_ips)
433 def test_missing_cloud_node_update(self):
435 self.node_actor.update_cloud_node(None)
436 current_cloud = self.node_actor.cloud_node.get(self.TIMEOUT)
437 self.assertEqual([testutil.ip_address_mock(1)],
438 current_cloud.private_ips)
440 def test_update_arvados_node(self):
442 job_uuid = 'zzzzz-jjjjj-updatejobnode00'
443 new_arvados = testutil.arvados_node_mock(3, job_uuid)
444 self.node_actor.update_arvados_node(new_arvados)
445 current_arvados = self.node_actor.arvados_node.get(self.TIMEOUT)
446 self.assertEqual(job_uuid, current_arvados['job_uuid'])
448 def test_missing_arvados_node_update(self):
449 self.make_actor(4, testutil.arvados_node_mock(4))
450 self.node_actor.update_arvados_node(None)
451 current_arvados = self.node_actor.arvados_node.get(self.TIMEOUT)
452 self.assertEqual(testutil.ip_address_mock(4),
453 current_arvados['ip_address'])
455 def test_update_arvados_node_syncs_when_fqdn_mismatch(self):
457 self.cloud_mock.extra['testname'] = 'cloudfqdn.zzzzz.arvadosapi.com'
459 arv_node = testutil.arvados_node_mock(5)
460 self.node_actor.update_arvados_node(arv_node).get(self.TIMEOUT)
461 self.assertEqual(1, self.updates.sync_node.call_count)
463 def test_update_arvados_node_skips_sync_when_fqdn_match(self):
465 arv_node = testutil.arvados_node_mock(6)
466 self.cloud_mock.extra['testname'] ='{n[hostname]}.{n[domain]}'.format(
469 self.node_actor.update_arvados_node(arv_node).get(self.TIMEOUT)
470 self.assertEqual(0, self.updates.sync_node.call_count)