3 from __future__ import absolute_import, print_function
8 import arvados.errors as arverror
13 import arvnodeman.computenode.dispatch as dispatch
14 from . import testutil
16 class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
17 def make_mocks(self, arvados_effect=None):
18 if arvados_effect is None:
19 arvados_effect = [testutil.arvados_node_mock()]
20 self.arvados_effect = arvados_effect
21 self.timer = testutil.MockTimer()
22 self.api_client = mock.MagicMock(name='api_client')
23 self.api_client.nodes().create().execute.side_effect = arvados_effect
24 self.api_client.nodes().update().execute.side_effect = arvados_effect
25 self.cloud_client = mock.MagicMock(name='cloud_client')
26 self.cloud_client.create_node.return_value = testutil.cloud_node_mock(1)
28 def make_actor(self, arv_node=None):
29 if not hasattr(self, 'timer'):
30 self.make_mocks(arvados_effect=[arv_node] if arv_node else None)
31 self.setup_actor = dispatch.ComputeNodeSetupActor.start(
32 self.timer, self.api_client, self.cloud_client,
33 testutil.MockSize(1), arv_node).proxy()
35 def assert_node_properties_updated(self, uuid=None,
36 size=testutil.MockSize(1)):
37 self.api_client.nodes().update.assert_any_call(
38 uuid=(uuid or self.arvados_effect[-1]['uuid']),
43 'price': size.price}}})
45 def test_creation_without_arvados_node(self):
47 self.assertEqual(self.arvados_effect[-1],
48 self.setup_actor.arvados_node.get(self.TIMEOUT))
49 self.assertEqual(1, self.api_client.nodes().create().execute.call_count)
50 self.assertEqual(1, self.api_client.nodes().update().execute.call_count)
51 self.assert_node_properties_updated()
52 self.assertEqual(self.cloud_client.create_node(),
53 self.setup_actor.cloud_node.get(self.TIMEOUT))
55 def test_creation_with_arvados_node(self):
56 self.make_mocks(arvados_effect=[testutil.arvados_node_mock()]*2)
57 self.make_actor(testutil.arvados_node_mock())
58 self.assertEqual(self.arvados_effect[-1],
59 self.setup_actor.arvados_node.get(self.TIMEOUT))
60 self.assert_node_properties_updated()
61 self.assertEqual(2, self.api_client.nodes().update().execute.call_count)
62 self.assertEqual(self.cloud_client.create_node(),
63 self.setup_actor.cloud_node.get(self.TIMEOUT))
65 def test_failed_arvados_calls_retried(self):
67 arverror.ApiError(httplib2.Response({'status': '500'}), ""),
68 testutil.arvados_node_mock(),
71 self.wait_for_assignment(self.setup_actor, 'arvados_node')
73 def test_failed_cloud_calls_retried(self):
75 self.cloud_client.create_node.side_effect = [
76 Exception("test cloud creation error"),
77 self.cloud_client.create_node.return_value,
80 self.wait_for_assignment(self.setup_actor, 'cloud_node')
82 def test_failed_post_create_retried(self):
84 self.cloud_client.post_create_node.side_effect = [
85 Exception("test cloud post-create error"), None]
87 done = self.FUTURE_CLASS()
88 self.setup_actor.subscribe(done.set)
89 done.get(self.TIMEOUT)
90 self.assertEqual(2, self.cloud_client.post_create_node.call_count)
92 def test_stop_when_no_cloud_node(self):
94 arverror.ApiError(httplib2.Response({'status': '500'}), ""))
97 self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
99 self.setup_actor.actor_ref.actor_stopped.wait(self.TIMEOUT))
101 def test_no_stop_when_cloud_node(self):
103 self.wait_for_assignment(self.setup_actor, 'cloud_node')
105 self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
106 self.assertTrue(self.stop_proxy(self.setup_actor),
107 "actor was stopped by stop_if_no_cloud_node")
109 def test_subscribe(self):
111 arverror.ApiError(httplib2.Response({'status': '500'}), ""))
113 subscriber = mock.Mock(name='subscriber_mock')
114 self.setup_actor.subscribe(subscriber)
115 retry_resp = [testutil.arvados_node_mock()]
116 self.api_client.nodes().create().execute.side_effect = retry_resp
117 self.api_client.nodes().update().execute.side_effect = retry_resp
118 self.wait_for_assignment(self.setup_actor, 'cloud_node')
119 self.assertEqual(self.setup_actor.actor_ref.actor_urn,
120 subscriber.call_args[0][0].actor_ref.actor_urn)
122 def test_late_subscribe(self):
124 subscriber = mock.Mock(name='subscriber_mock')
125 self.wait_for_assignment(self.setup_actor, 'cloud_node')
126 self.setup_actor.subscribe(subscriber).get(self.TIMEOUT)
127 self.stop_proxy(self.setup_actor)
128 self.assertEqual(self.setup_actor.actor_ref.actor_urn,
129 subscriber.call_args[0][0].actor_ref.actor_urn)
132 class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
133 def make_mocks(self, cloud_node=None, arvados_node=None,
134 shutdown_open=True, node_broken=False):
135 self.timer = testutil.MockTimer()
136 self.shutdowns = testutil.MockShutdownTimer()
137 self.shutdowns._set_state(shutdown_open, 300)
138 self.cloud_client = mock.MagicMock(name='cloud_client')
139 self.cloud_client.broken.return_value = node_broken
140 self.arvados_client = mock.MagicMock(name='arvados_client')
141 self.updates = mock.MagicMock(name='update_mock')
142 if cloud_node is None:
143 cloud_node = testutil.cloud_node_mock()
144 self.cloud_node = cloud_node
145 self.arvados_node = arvados_node
147 def make_actor(self, cancellable=True, start_time=None):
148 if not hasattr(self, 'timer'):
150 if start_time is None:
151 start_time = time.time()
152 monitor_actor = dispatch.ComputeNodeMonitorActor.start(
153 self.cloud_node, start_time, self.shutdowns,
154 testutil.cloud_node_fqdn, self.timer, self.updates, self.cloud_client,
156 self.shutdown_actor = self.ACTOR_CLASS.start(
157 self.timer, self.cloud_client, self.arvados_client, monitor_actor,
159 self.monitor_actor = monitor_actor.proxy()
161 def check_success_flag(self, expected, allow_msg_count=1):
162 # allow_msg_count is the number of internal messages that may
163 # need to be handled for shutdown to finish.
164 for try_num in range(1 + allow_msg_count):
165 last_flag = self.shutdown_actor.success.get(self.TIMEOUT)
166 if last_flag is expected:
169 self.fail("success flag {} is not {}".format(last_flag, expected))
171 def test_uncancellable_shutdown(self, *mocks):
172 self.make_mocks(shutdown_open=False)
173 self.cloud_client.destroy_node.return_value = False
174 self.make_actor(cancellable=False)
175 self.check_success_flag(None, 0)
176 self.shutdowns._set_state(True, 600)
177 self.cloud_client.destroy_node.return_value = True
178 self.check_success_flag(True)
180 def test_arvados_node_cleaned_after_shutdown(self, *mocks):
181 cloud_node = testutil.cloud_node_mock(62)
182 arv_node = testutil.arvados_node_mock(62)
183 self.make_mocks(cloud_node, arv_node)
185 self.check_success_flag(True, 3)
186 update_mock = self.arvados_client.nodes().update
187 self.assertTrue(update_mock.called)
188 update_kwargs = update_mock.call_args_list[0][1]
189 self.assertEqual(arv_node['uuid'], update_kwargs.get('uuid'))
190 self.assertIn('body', update_kwargs)
191 for clear_key in ['slot_number', 'hostname', 'ip_address',
192 'first_ping_at', 'last_ping_at']:
193 self.assertIn(clear_key, update_kwargs['body'])
194 self.assertIsNone(update_kwargs['body'][clear_key])
195 self.assertTrue(update_mock().execute.called)
197 def test_arvados_node_not_cleaned_after_shutdown_cancelled(self, *mocks):
198 cloud_node = testutil.cloud_node_mock(61)
199 arv_node = testutil.arvados_node_mock(61)
200 self.make_mocks(cloud_node, arv_node, shutdown_open=False)
201 self.make_actor(cancellable=True)
202 self.check_success_flag(False, 2)
203 self.assertFalse(self.arvados_client.nodes().update.called)
206 class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
208 ACTOR_CLASS = dispatch.ComputeNodeShutdownActor
210 def test_easy_shutdown(self):
211 self.make_actor(start_time=0)
212 self.check_success_flag(True)
213 self.assertTrue(self.cloud_client.destroy_node.called)
215 def test_shutdown_cancelled_when_window_closes(self):
216 self.make_mocks(shutdown_open=False)
218 self.check_success_flag(False, 2)
219 self.assertFalse(self.cloud_client.destroy_node.called)
220 self.assertEqual(self.ACTOR_CLASS.WINDOW_CLOSED,
221 self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
223 def test_shutdown_retries_when_cloud_fails(self):
225 self.cloud_client.destroy_node.return_value = False
226 self.make_actor(start_time=0)
227 self.assertIsNone(self.shutdown_actor.success.get(self.TIMEOUT))
228 self.cloud_client.destroy_node.return_value = True
229 self.check_success_flag(True)
231 def test_shutdown_cancelled_when_cloud_fails_on_broken_node(self):
232 self.make_mocks(node_broken=True)
233 self.cloud_client.destroy_node.return_value = False
234 self.make_actor(start_time=0)
235 self.check_success_flag(False, 2)
236 self.assertEqual(1, self.cloud_client.destroy_node.call_count)
237 self.assertEqual(self.ACTOR_CLASS.NODE_BROKEN,
238 self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
240 def test_late_subscribe(self):
242 subscriber = mock.Mock(name='subscriber_mock')
243 self.shutdown_actor.subscribe(subscriber).get(self.TIMEOUT)
244 self.stop_proxy(self.shutdown_actor)
245 self.assertTrue(subscriber.called)
246 self.assertEqual(self.shutdown_actor.actor_ref.actor_urn,
247 subscriber.call_args[0][0].actor_ref.actor_urn)
250 class ComputeNodeUpdateActorTestCase(testutil.ActorTestMixin,
252 def make_actor(self):
253 self.driver = mock.MagicMock(name='driver_mock')
254 self.updater = dispatch.ComputeNodeUpdateActor.start(self.driver).proxy()
256 def test_node_sync(self):
258 cloud_node = testutil.cloud_node_mock()
259 arv_node = testutil.arvados_node_mock()
260 self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
261 self.driver().sync_node.assert_called_with(cloud_node, arv_node)
264 class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
266 def make_mocks(self, node_num):
267 self.shutdowns = testutil.MockShutdownTimer()
268 self.shutdowns._set_state(False, 300)
269 self.timer = mock.MagicMock(name='timer_mock')
270 self.updates = mock.MagicMock(name='update_mock')
271 self.cloud_mock = testutil.cloud_node_mock(node_num)
272 self.subscriber = mock.Mock(name='subscriber_mock')
273 self.cloud_client = mock.MagicMock(name='cloud_client')
274 self.cloud_client.broken.return_value = False
276 def make_actor(self, node_num=1, arv_node=None, start_time=None):
277 if not hasattr(self, 'cloud_mock'):
278 self.make_mocks(node_num)
279 if start_time is None:
280 start_time = time.time()
281 self.node_actor = dispatch.ComputeNodeMonitorActor.start(
282 self.cloud_mock, start_time, self.shutdowns,
283 testutil.cloud_node_fqdn, self.timer, self.updates, self.cloud_client,
284 arv_node, boot_fail_after=300).proxy()
285 self.node_actor.subscribe(self.subscriber).get(self.TIMEOUT)
287 def node_state(self, *states):
288 return self.node_actor.in_state(*states).get(self.TIMEOUT)
290 def test_in_state_when_unpaired(self):
292 self.assertIsNone(self.node_state('idle', 'busy'))
294 def test_in_state_when_pairing_stale(self):
295 self.make_actor(arv_node=testutil.arvados_node_mock(
296 job_uuid=None, age=90000))
297 self.assertIsNone(self.node_state('idle', 'busy'))
299 def test_in_state_when_no_state_available(self):
300 self.make_actor(arv_node=testutil.arvados_node_mock(
301 crunch_worker_state=None))
302 self.assertIsNone(self.node_state('idle', 'busy'))
304 def test_in_idle_state(self):
305 self.make_actor(2, arv_node=testutil.arvados_node_mock(job_uuid=None))
306 self.assertTrue(self.node_state('idle'))
307 self.assertFalse(self.node_state('busy'))
308 self.assertTrue(self.node_state('idle', 'busy'))
310 def test_in_busy_state(self):
311 self.make_actor(3, arv_node=testutil.arvados_node_mock(job_uuid=True))
312 self.assertFalse(self.node_state('idle'))
313 self.assertTrue(self.node_state('busy'))
314 self.assertTrue(self.node_state('idle', 'busy'))
316 def test_init_shutdown_scheduling(self):
318 self.assertTrue(self.timer.schedule.called)
319 self.assertEqual(300, self.timer.schedule.call_args[0][0])
321 def test_shutdown_window_close_scheduling(self):
323 self.shutdowns._set_state(False, 600)
324 self.timer.schedule.reset_mock()
325 self.node_actor.consider_shutdown().get(self.TIMEOUT)
326 self.stop_proxy(self.node_actor)
327 self.assertTrue(self.timer.schedule.called)
328 self.assertEqual(600, self.timer.schedule.call_args[0][0])
329 self.assertFalse(self.subscriber.called)
331 def test_shutdown_subscription(self):
332 self.make_actor(start_time=0)
333 self.shutdowns._set_state(True, 600)
334 self.node_actor.consider_shutdown().get(self.TIMEOUT)
335 self.assertTrue(self.subscriber.called)
336 self.assertEqual(self.node_actor.actor_ref.actor_urn,
337 self.subscriber.call_args[0][0].actor_ref.actor_urn)
339 def test_no_shutdown_booting(self):
341 self.shutdowns._set_state(True, 600)
342 self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
344 def test_shutdown_without_arvados_node(self):
345 self.make_actor(start_time=0)
346 self.shutdowns._set_state(True, 600)
347 self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
349 def test_no_shutdown_missing(self):
350 arv_node = testutil.arvados_node_mock(10, job_uuid=None,
351 crunch_worker_state="down",
352 last_ping_at='1970-01-01T01:02:03.04050607Z')
353 self.make_actor(10, arv_node)
354 self.shutdowns._set_state(True, 600)
355 self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
357 def test_no_shutdown_running_broken(self):
358 arv_node = testutil.arvados_node_mock(12, job_uuid=None,
359 crunch_worker_state="down")
360 self.make_actor(12, arv_node)
361 self.shutdowns._set_state(True, 600)
362 self.cloud_client.broken.return_value = True
363 self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
365 def test_shutdown_missing_broken(self):
366 arv_node = testutil.arvados_node_mock(11, job_uuid=None,
367 crunch_worker_state="down",
368 last_ping_at='1970-01-01T01:02:03.04050607Z')
369 self.make_actor(11, arv_node)
370 self.shutdowns._set_state(True, 600)
371 self.cloud_client.broken.return_value = True
372 self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
374 def test_no_shutdown_when_window_closed(self):
375 self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None))
376 self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
378 def test_no_shutdown_when_node_running_job(self):
379 self.make_actor(4, testutil.arvados_node_mock(4, job_uuid=True))
380 self.shutdowns._set_state(True, 600)
381 self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
383 def test_no_shutdown_when_node_state_unknown(self):
384 self.make_actor(5, testutil.arvados_node_mock(
385 5, crunch_worker_state=None))
386 self.shutdowns._set_state(True, 600)
387 self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
389 def test_no_shutdown_when_node_state_stale(self):
390 self.make_actor(6, testutil.arvados_node_mock(6, age=90000))
391 self.shutdowns._set_state(True, 600)
392 self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
394 def test_arvados_node_match(self):
396 arv_node = testutil.arvados_node_mock(
397 2, hostname='compute-two.zzzzz.arvadosapi.com')
398 pair_id = self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT)
399 self.assertEqual(self.cloud_mock.id, pair_id)
400 self.stop_proxy(self.node_actor)
401 self.updates.sync_node.assert_called_with(self.cloud_mock, arv_node)
403 def test_arvados_node_mismatch(self):
405 arv_node = testutil.arvados_node_mock(1)
407 self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT))
409 def test_arvados_node_mismatch_first_ping_too_early(self):
411 arv_node = testutil.arvados_node_mock(
412 4, first_ping_at='1971-03-02T14:15:16.1717282Z')
414 self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT))
416 def test_update_cloud_node(self):
419 self.cloud_mock.id = '1'
420 self.node_actor.update_cloud_node(self.cloud_mock)
421 current_cloud = self.node_actor.cloud_node.get(self.TIMEOUT)
422 self.assertEqual([testutil.ip_address_mock(2)],
423 current_cloud.private_ips)
425 def test_missing_cloud_node_update(self):
427 self.node_actor.update_cloud_node(None)
428 current_cloud = self.node_actor.cloud_node.get(self.TIMEOUT)
429 self.assertEqual([testutil.ip_address_mock(1)],
430 current_cloud.private_ips)
432 def test_update_arvados_node(self):
434 job_uuid = 'zzzzz-jjjjj-updatejobnode00'
435 new_arvados = testutil.arvados_node_mock(3, job_uuid)
436 self.node_actor.update_arvados_node(new_arvados)
437 current_arvados = self.node_actor.arvados_node.get(self.TIMEOUT)
438 self.assertEqual(job_uuid, current_arvados['job_uuid'])
440 def test_missing_arvados_node_update(self):
441 self.make_actor(4, testutil.arvados_node_mock(4))
442 self.node_actor.update_arvados_node(None)
443 current_arvados = self.node_actor.arvados_node.get(self.TIMEOUT)
444 self.assertEqual(testutil.ip_address_mock(4),
445 current_arvados['ip_address'])
447 def test_update_arvados_node_syncs_when_fqdn_mismatch(self):
449 self.cloud_mock.extra['testname'] = 'cloudfqdn.zzzzz.arvadosapi.com'
451 arv_node = testutil.arvados_node_mock(5)
452 self.node_actor.update_arvados_node(arv_node).get(self.TIMEOUT)
453 self.assertEqual(1, self.updates.sync_node.call_count)
455 def test_update_arvados_node_skips_sync_when_fqdn_match(self):
457 arv_node = testutil.arvados_node_mock(6)
458 self.cloud_mock.extra['testname'] ='{n[hostname]}.{n[domain]}'.format(
461 self.node_actor.update_arvados_node(arv_node).get(self.TIMEOUT)
462 self.assertEqual(0, self.updates.sync_node.call_count)