Merge branch '4293-node-manager-timed-bootstrap-wip'
[arvados.git] / services / nodemanager / tests / test_computenode_dispatch.py
1 #!/usr/bin/env python
2
3 from __future__ import absolute_import, print_function
4
5 import time
6 import unittest
7
8 import arvados.errors as arverror
9 import httplib2
10 import mock
11 import pykka
12
13 import arvnodeman.computenode.dispatch as dispatch
14 from . import testutil
15
16 class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
17     def make_mocks(self, arvados_effect=None, cloud_effect=None):
18         if arvados_effect is None:
19             arvados_effect = [testutil.arvados_node_mock()]
20         self.arvados_effect = arvados_effect
21         self.timer = testutil.MockTimer()
22         self.api_client = mock.MagicMock(name='api_client')
23         self.api_client.nodes().create().execute.side_effect = arvados_effect
24         self.api_client.nodes().update().execute.side_effect = arvados_effect
25         self.cloud_client = mock.MagicMock(name='cloud_client')
26         self.cloud_client.create_node.return_value = testutil.cloud_node_mock(1)
27
28     def make_actor(self, arv_node=None):
29         if not hasattr(self, 'timer'):
30             self.make_mocks(arvados_effect=[arv_node])
31         self.setup_actor = dispatch.ComputeNodeSetupActor.start(
32             self.timer, self.api_client, self.cloud_client,
33             testutil.MockSize(1), arv_node).proxy()
34
35     def test_creation_without_arvados_node(self):
36         self.make_actor()
37         self.assertEqual(self.arvados_effect[-1],
38                          self.setup_actor.arvados_node.get(self.TIMEOUT))
39         self.assertTrue(self.api_client.nodes().create().execute.called)
40         self.assertEqual(self.cloud_client.create_node(),
41                          self.setup_actor.cloud_node.get(self.TIMEOUT))
42
43     def test_creation_with_arvados_node(self):
44         self.make_actor(testutil.arvados_node_mock())
45         self.assertEqual(self.arvados_effect[-1],
46                          self.setup_actor.arvados_node.get(self.TIMEOUT))
47         self.assertTrue(self.api_client.nodes().update().execute.called)
48         self.assertEqual(self.cloud_client.create_node(),
49                          self.setup_actor.cloud_node.get(self.TIMEOUT))
50
51     def test_failed_calls_retried(self):
52         self.make_mocks([
53                 arverror.ApiError(httplib2.Response({'status': '500'}), ""),
54                 testutil.arvados_node_mock(),
55                 ])
56         self.make_actor()
57         self.wait_for_assignment(self.setup_actor, 'cloud_node')
58
59     def test_stop_when_no_cloud_node(self):
60         self.make_mocks(
61             arverror.ApiError(httplib2.Response({'status': '500'}), ""))
62         self.make_actor()
63         self.setup_actor.stop_if_no_cloud_node()
64         self.assertTrue(
65             self.setup_actor.actor_ref.actor_stopped.wait(self.TIMEOUT))
66
67     def test_no_stop_when_cloud_node(self):
68         self.make_actor()
69         self.wait_for_assignment(self.setup_actor, 'cloud_node')
70         self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT)
71         self.assertTrue(self.stop_proxy(self.setup_actor),
72                         "actor was stopped by stop_if_no_cloud_node")
73
74     def test_subscribe(self):
75         self.make_mocks(
76             arverror.ApiError(httplib2.Response({'status': '500'}), ""))
77         self.make_actor()
78         subscriber = mock.Mock(name='subscriber_mock')
79         self.setup_actor.subscribe(subscriber)
80         self.api_client.nodes().create().execute.side_effect = [
81             testutil.arvados_node_mock()]
82         self.wait_for_assignment(self.setup_actor, 'cloud_node')
83         self.assertEqual(self.setup_actor.actor_ref.actor_urn,
84                          subscriber.call_args[0][0].actor_ref.actor_urn)
85
86     def test_late_subscribe(self):
87         self.make_actor()
88         subscriber = mock.Mock(name='subscriber_mock')
89         self.wait_for_assignment(self.setup_actor, 'cloud_node')
90         self.setup_actor.subscribe(subscriber).get(self.TIMEOUT)
91         self.stop_proxy(self.setup_actor)
92         self.assertEqual(self.setup_actor.actor_ref.actor_urn,
93                          subscriber.call_args[0][0].actor_ref.actor_urn)
94
95
96 class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
97     def make_mocks(self, cloud_node=None, arvados_node=None,
98                    shutdown_open=True):
99         self.timer = testutil.MockTimer()
100         self.shutdowns = testutil.MockShutdownTimer()
101         self.shutdowns._set_state(shutdown_open, 300)
102         self.cloud_client = mock.MagicMock(name='cloud_client')
103         self.updates = mock.MagicMock(name='update_mock')
104         if cloud_node is None:
105             cloud_node = testutil.cloud_node_mock()
106         self.cloud_node = cloud_node
107         self.arvados_node = arvados_node
108
109     def make_actor(self, cancellable=True):
110         if not hasattr(self, 'timer'):
111             self.make_mocks()
112         monitor_actor = dispatch.ComputeNodeMonitorActor.start(
113             self.cloud_node, time.time(), self.shutdowns, self.timer,
114             self.updates, self.arvados_node)
115         self.shutdown_actor = self.ACTOR_CLASS.start(
116             self.timer, self.cloud_client, monitor_actor, cancellable).proxy()
117         self.monitor_actor = monitor_actor.proxy()
118
119     def check_success_flag(self, expected, allow_msg_count=1):
120         # allow_msg_count is the number of internal messages that may
121         # need to be handled for shutdown to finish.
122         for try_num in range(1 + allow_msg_count):
123             last_flag = self.shutdown_actor.success.get(self.TIMEOUT)
124             if last_flag is expected:
125                 break
126         else:
127             self.fail("success flag {} is not {}".format(last_flag, expected))
128
129     def test_uncancellable_shutdown(self, *mocks):
130         self.make_mocks(shutdown_open=False)
131         self.cloud_client.destroy_node.return_value = False
132         self.make_actor(cancellable=False)
133         self.check_success_flag(None, 0)
134         self.shutdowns._set_state(True, 600)
135         self.cloud_client.destroy_node.return_value = True
136         self.check_success_flag(True)
137
138
139 class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
140                                        unittest.TestCase):
141     ACTOR_CLASS = dispatch.ComputeNodeShutdownActor
142
143     def test_easy_shutdown(self):
144         self.make_actor()
145         self.check_success_flag(True)
146         self.assertTrue(self.cloud_client.destroy_node.called)
147
148     def test_shutdown_cancelled_when_window_closes(self):
149         self.make_mocks(shutdown_open=False)
150         self.make_actor()
151         self.check_success_flag(False, 2)
152         self.assertFalse(self.cloud_client.destroy_node.called)
153
154     def test_shutdown_retries_when_cloud_fails(self):
155         self.make_mocks()
156         self.cloud_client.destroy_node.return_value = False
157         self.make_actor()
158         self.assertIsNone(self.shutdown_actor.success.get(self.TIMEOUT))
159         self.cloud_client.destroy_node.return_value = True
160         self.check_success_flag(True)
161
162     def test_late_subscribe(self):
163         self.make_actor()
164         subscriber = mock.Mock(name='subscriber_mock')
165         self.shutdown_actor.subscribe(subscriber).get(self.TIMEOUT)
166         self.stop_proxy(self.shutdown_actor)
167         self.assertTrue(subscriber.called)
168         self.assertEqual(self.shutdown_actor.actor_ref.actor_urn,
169                          subscriber.call_args[0][0].actor_ref.actor_urn)
170
171
172 class ComputeNodeUpdateActorTestCase(testutil.ActorTestMixin,
173                                      unittest.TestCase):
174     def make_actor(self):
175         self.driver = mock.MagicMock(name='driver_mock')
176         self.updater = dispatch.ComputeNodeUpdateActor.start(self.driver).proxy()
177
178     def test_node_sync(self):
179         self.make_actor()
180         cloud_node = testutil.cloud_node_mock()
181         arv_node = testutil.arvados_node_mock()
182         self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
183         self.driver().sync_node.assert_called_with(cloud_node, arv_node)
184
185
186 class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
187                                       unittest.TestCase):
188     def make_mocks(self, node_num):
189         self.shutdowns = testutil.MockShutdownTimer()
190         self.shutdowns._set_state(False, 300)
191         self.timer = mock.MagicMock(name='timer_mock')
192         self.updates = mock.MagicMock(name='update_mock')
193         self.cloud_mock = testutil.cloud_node_mock(node_num)
194         self.subscriber = mock.Mock(name='subscriber_mock')
195
196     def make_actor(self, node_num=1, arv_node=None, start_time=None):
197         if not hasattr(self, 'cloud_mock'):
198             self.make_mocks(node_num)
199         if start_time is None:
200             start_time = time.time()
201         self.node_actor = dispatch.ComputeNodeMonitorActor.start(
202             self.cloud_mock, start_time, self.shutdowns, self.timer,
203             self.updates, arv_node).proxy()
204         self.node_actor.subscribe(self.subscriber).get(self.TIMEOUT)
205
206     def node_state(self, *states):
207         return self.node_actor.in_state(*states).get(self.TIMEOUT)
208
209     def test_in_state_when_unpaired(self):
210         self.make_actor()
211         self.assertIsNone(self.node_state('idle', 'alloc'))
212
213     def test_in_state_when_pairing_stale(self):
214         self.make_actor(arv_node=testutil.arvados_node_mock(
215                 job_uuid=None, age=90000))
216         self.assertIsNone(self.node_state('idle', 'alloc'))
217
218     def test_in_state_when_no_state_available(self):
219         self.make_actor(arv_node=testutil.arvados_node_mock(info={}))
220         self.assertIsNone(self.node_state('idle', 'alloc'))
221
222     def test_in_idle_state(self):
223         self.make_actor(2, arv_node=testutil.arvados_node_mock(job_uuid=None))
224         self.assertTrue(self.node_state('idle'))
225         self.assertFalse(self.node_state('alloc'))
226         self.assertTrue(self.node_state('idle', 'alloc'))
227
228     def test_in_alloc_state(self):
229         self.make_actor(3, arv_node=testutil.arvados_node_mock(job_uuid=True))
230         self.assertFalse(self.node_state('idle'))
231         self.assertTrue(self.node_state('alloc'))
232         self.assertTrue(self.node_state('idle', 'alloc'))
233
234     def test_init_shutdown_scheduling(self):
235         self.make_actor()
236         self.assertTrue(self.timer.schedule.called)
237         self.assertEqual(300, self.timer.schedule.call_args[0][0])
238
239     def test_shutdown_window_close_scheduling(self):
240         self.make_actor()
241         self.shutdowns._set_state(False, 600)
242         self.timer.schedule.reset_mock()
243         self.node_actor.consider_shutdown().get(self.TIMEOUT)
244         self.stop_proxy(self.node_actor)
245         self.assertTrue(self.timer.schedule.called)
246         self.assertEqual(600, self.timer.schedule.call_args[0][0])
247         self.assertFalse(self.subscriber.called)
248
249     def test_shutdown_subscription(self):
250         self.make_actor()
251         self.shutdowns._set_state(True, 600)
252         self.node_actor.consider_shutdown().get(self.TIMEOUT)
253         self.assertTrue(self.subscriber.called)
254         self.assertEqual(self.node_actor.actor_ref.actor_urn,
255                          self.subscriber.call_args[0][0].actor_ref.actor_urn)
256
257     def test_shutdown_without_arvados_node(self):
258         self.make_actor()
259         self.shutdowns._set_state(True, 600)
260         self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
261
262     def test_no_shutdown_without_arvados_node_and_old_cloud_node(self):
263         self.make_actor(start_time=0)
264         self.shutdowns._set_state(True, 600)
265         self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
266
267     def test_no_shutdown_when_window_closed(self):
268         self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None))
269         self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
270
271     def test_no_shutdown_when_node_running_job(self):
272         self.make_actor(4, testutil.arvados_node_mock(4, job_uuid=True))
273         self.shutdowns._set_state(True, 600)
274         self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
275
276     def test_no_shutdown_when_node_state_unknown(self):
277         self.make_actor(5, testutil.arvados_node_mock(5, info={}))
278         self.shutdowns._set_state(True, 600)
279         self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
280
281     def test_no_shutdown_when_node_state_stale(self):
282         self.make_actor(6, testutil.arvados_node_mock(6, age=90000))
283         self.shutdowns._set_state(True, 600)
284         self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
285
286     def test_arvados_node_match(self):
287         self.make_actor(2)
288         arv_node = testutil.arvados_node_mock(
289             2, hostname='compute-two.zzzzz.arvadosapi.com')
290         pair_id = self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT)
291         self.assertEqual(self.cloud_mock.id, pair_id)
292         self.stop_proxy(self.node_actor)
293         self.updates.sync_node.assert_called_with(self.cloud_mock, arv_node)
294
295     def test_arvados_node_mismatch(self):
296         self.make_actor(3)
297         arv_node = testutil.arvados_node_mock(1)
298         self.assertIsNone(
299             self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT))
300
301     def test_update_cloud_node(self):
302         self.make_actor(1)
303         self.make_mocks(2)
304         self.cloud_mock.id = '1'
305         self.node_actor.update_cloud_node(self.cloud_mock)
306         current_cloud = self.node_actor.cloud_node.get(self.TIMEOUT)
307         self.assertEqual([testutil.ip_address_mock(2)],
308                          current_cloud.private_ips)
309
310     def test_missing_cloud_node_update(self):
311         self.make_actor(1)
312         self.node_actor.update_cloud_node(None)
313         current_cloud = self.node_actor.cloud_node.get(self.TIMEOUT)
314         self.assertEqual([testutil.ip_address_mock(1)],
315                          current_cloud.private_ips)
316
317     def test_update_arvados_node(self):
318         self.make_actor(3)
319         job_uuid = 'zzzzz-jjjjj-updatejobnode00'
320         new_arvados = testutil.arvados_node_mock(3, job_uuid)
321         self.node_actor.update_arvados_node(new_arvados)
322         current_arvados = self.node_actor.arvados_node.get(self.TIMEOUT)
323         self.assertEqual(job_uuid, current_arvados['job_uuid'])
324
325     def test_missing_arvados_node_update(self):
326         self.make_actor(4, testutil.arvados_node_mock(4))
327         self.node_actor.update_arvados_node(None)
328         current_arvados = self.node_actor.arvados_node.get(self.TIMEOUT)
329         self.assertEqual(testutil.ip_address_mock(4),
330                          current_arvados['ip_address'])