Brown bag typo bugfix. refs #7031
[arvados.git] / services / nodemanager / tests / test_computenode_dispatch.py
index 7f6988dbe9df21dce1797698156f5a6e6390e517..c22e7a0e0b8d16a0d55b782df27e1c0bfe3ecacb 100644 (file)
@@ -14,7 +14,7 @@ import arvnodeman.computenode.dispatch as dispatch
 from . import testutil
 
 class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
-    def make_mocks(self, arvados_effect=None, cloud_effect=None):
+    def make_mocks(self, arvados_effect=None):
         if arvados_effect is None:
             arvados_effect = [testutil.arvados_node_mock()]
         self.arvados_effect = arvados_effect
@@ -48,26 +48,47 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.assertEqual(self.cloud_client.create_node(),
                          self.setup_actor.cloud_node.get(self.TIMEOUT))
 
-    def test_failed_calls_retried(self):
+    def test_failed_arvados_calls_retried(self):
         self.make_mocks([
                 arverror.ApiError(httplib2.Response({'status': '500'}), ""),
                 testutil.arvados_node_mock(),
                 ])
         self.make_actor()
+        self.wait_for_assignment(self.setup_actor, 'arvados_node')
+
+    def test_failed_cloud_calls_retried(self):
+        self.make_mocks()
+        self.cloud_client.create_node.side_effect = [
+            Exception("test cloud creation error"),
+            self.cloud_client.create_node.return_value,
+            ]
+        self.make_actor()
         self.wait_for_assignment(self.setup_actor, 'cloud_node')
 
+    def test_failed_post_create_retried(self):
+        self.make_mocks()
+        self.cloud_client.post_create_node.side_effect = [
+            Exception("test cloud post-create error"), None]
+        self.make_actor()
+        done = self.FUTURE_CLASS()
+        self.setup_actor.subscribe(done.set)
+        done.get(self.TIMEOUT)
+        self.assertEqual(2, self.cloud_client.post_create_node.call_count)
+
     def test_stop_when_no_cloud_node(self):
         self.make_mocks(
             arverror.ApiError(httplib2.Response({'status': '500'}), ""))
         self.make_actor()
-        self.setup_actor.stop_if_no_cloud_node()
+        self.assertTrue(
+            self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
         self.assertTrue(
             self.setup_actor.actor_ref.actor_stopped.wait(self.TIMEOUT))
 
     def test_no_stop_when_cloud_node(self):
         self.make_actor()
         self.wait_for_assignment(self.setup_actor, 'cloud_node')
-        self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT)
+        self.assertFalse(
+            self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
         self.assertTrue(self.stop_proxy(self.setup_actor),
                         "actor was stopped by stop_if_no_cloud_node")
 
@@ -100,20 +121,23 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
         self.shutdowns = testutil.MockShutdownTimer()
         self.shutdowns._set_state(shutdown_open, 300)
         self.cloud_client = mock.MagicMock(name='cloud_client')
+        self.arvados_client = mock.MagicMock(name='arvados_client')
         self.updates = mock.MagicMock(name='update_mock')
         if cloud_node is None:
             cloud_node = testutil.cloud_node_mock()
         self.cloud_node = cloud_node
         self.arvados_node = arvados_node
 
-    def make_actor(self):
+    def make_actor(self, cancellable=True):
         if not hasattr(self, 'timer'):
             self.make_mocks()
         monitor_actor = dispatch.ComputeNodeMonitorActor.start(
-            self.cloud_node, time.time(), self.shutdowns, self.timer,
-            self.updates, self.arvados_node)
+            self.cloud_node, time.time(), self.shutdowns,
+            testutil.cloud_node_fqdn, self.timer, self.updates,
+            self.arvados_node)
         self.shutdown_actor = self.ACTOR_CLASS.start(
-            self.timer, self.cloud_client, monitor_actor).proxy()
+            self.timer, self.cloud_client, self.arvados_client, monitor_actor,
+            cancellable).proxy()
         self.monitor_actor = monitor_actor.proxy()
 
     def check_success_flag(self, expected, allow_msg_count=1):
@@ -126,6 +150,40 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
         else:
             self.fail("success flag {} is not {}".format(last_flag, expected))
 
+    def test_uncancellable_shutdown(self, *mocks):
+        self.make_mocks(shutdown_open=False)
+        self.cloud_client.destroy_node.return_value = False
+        self.make_actor(cancellable=False)
+        self.check_success_flag(None, 0)
+        self.shutdowns._set_state(True, 600)
+        self.cloud_client.destroy_node.return_value = True
+        self.check_success_flag(True)
+
+    def test_arvados_node_cleaned_after_shutdown(self, *mocks):
+        cloud_node = testutil.cloud_node_mock(62)
+        arv_node = testutil.arvados_node_mock(62)
+        self.make_mocks(cloud_node, arv_node)
+        self.make_actor()
+        self.check_success_flag(True, 3)
+        update_mock = self.arvados_client.nodes().update
+        self.assertTrue(update_mock.called)
+        update_kwargs = update_mock.call_args_list[0][1]
+        self.assertEqual(arv_node['uuid'], update_kwargs.get('uuid'))
+        self.assertIn('body', update_kwargs)
+        for clear_key in ['slot_number', 'hostname', 'ip_address',
+                          'first_ping_at', 'last_ping_at']:
+            self.assertIn(clear_key, update_kwargs['body'])
+            self.assertIsNone(update_kwargs['body'][clear_key])
+        self.assertTrue(update_mock().execute.called)
+
+    def test_arvados_node_not_cleaned_after_shutdown_cancelled(self, *mocks):
+        cloud_node = testutil.cloud_node_mock(61)
+        arv_node = testutil.arvados_node_mock(61)
+        self.make_mocks(cloud_node, arv_node, shutdown_open=False)
+        self.make_actor(cancellable=True)
+        self.check_success_flag(False, 2)
+        self.assertFalse(self.arvados_client.nodes().update.called)
+
 
 class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
                                        unittest.TestCase):
@@ -190,8 +248,9 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         if start_time is None:
             start_time = time.time()
         self.node_actor = dispatch.ComputeNodeMonitorActor.start(
-            self.cloud_mock, start_time, self.shutdowns, self.timer,
-            self.updates, arv_node).proxy()
+            self.cloud_mock, start_time, self.shutdowns,
+            testutil.cloud_node_fqdn, self.timer, self.updates,
+            arv_node).proxy()
         self.node_actor.subscribe(self.subscriber).get(self.TIMEOUT)
 
     def node_state(self, *states):
@@ -199,28 +258,29 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
 
     def test_in_state_when_unpaired(self):
         self.make_actor()
-        self.assertIsNone(self.node_state('idle', 'alloc'))
+        self.assertIsNone(self.node_state('idle', 'busy'))
 
     def test_in_state_when_pairing_stale(self):
         self.make_actor(arv_node=testutil.arvados_node_mock(
                 job_uuid=None, age=90000))
-        self.assertIsNone(self.node_state('idle', 'alloc'))
+        self.assertIsNone(self.node_state('idle', 'busy'))
 
     def test_in_state_when_no_state_available(self):
-        self.make_actor(arv_node=testutil.arvados_node_mock(info={}))
-        self.assertIsNone(self.node_state('idle', 'alloc'))
+        self.make_actor(arv_node=testutil.arvados_node_mock(
+                crunch_worker_state=None))
+        self.assertIsNone(self.node_state('idle', 'busy'))
 
     def test_in_idle_state(self):
         self.make_actor(2, arv_node=testutil.arvados_node_mock(job_uuid=None))
         self.assertTrue(self.node_state('idle'))
-        self.assertFalse(self.node_state('alloc'))
-        self.assertTrue(self.node_state('idle', 'alloc'))
+        self.assertFalse(self.node_state('busy'))
+        self.assertTrue(self.node_state('idle', 'busy'))
 
-    def test_in_alloc_state(self):
+    def test_in_busy_state(self):
         self.make_actor(3, arv_node=testutil.arvados_node_mock(job_uuid=True))
         self.assertFalse(self.node_state('idle'))
-        self.assertTrue(self.node_state('alloc'))
-        self.assertTrue(self.node_state('idle', 'alloc'))
+        self.assertTrue(self.node_state('busy'))
+        self.assertTrue(self.node_state('idle', 'busy'))
 
     def test_init_shutdown_scheduling(self):
         self.make_actor()
@@ -265,7 +325,8 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     def test_no_shutdown_when_node_state_unknown(self):
-        self.make_actor(5, testutil.arvados_node_mock(5, info={}))
+        self.make_actor(5, testutil.arvados_node_mock(
+            5, crunch_worker_state=None))
         self.shutdowns._set_state(True, 600)
         self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
@@ -289,6 +350,13 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.assertIsNone(
             self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT))
 
+    def test_arvados_node_mismatch_first_ping_too_early(self):
+        self.make_actor(4)
+        arv_node = testutil.arvados_node_mock(
+            4, first_ping_at='1971-03-02T14:15:16.1717282Z')
+        self.assertIsNone(
+            self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT))
+
     def test_update_cloud_node(self):
         self.make_actor(1)
         self.make_mocks(2)
@@ -319,3 +387,20 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         current_arvados = self.node_actor.arvados_node.get(self.TIMEOUT)
         self.assertEqual(testutil.ip_address_mock(4),
                          current_arvados['ip_address'])
+
+    def test_update_arvados_node_syncs_when_fqdn_mismatch(self):
+        self.make_mocks(5)
+        self.cloud_mock.extra['testname'] = 'cloudfqdn.zzzzz.arvadosapi.com'
+        self.make_actor()
+        arv_node = testutil.arvados_node_mock(5)
+        self.node_actor.update_arvados_node(arv_node).get(self.TIMEOUT)
+        self.assertEqual(1, self.updates.sync_node.call_count)
+
+    def test_update_arvados_node_skips_sync_when_fqdn_match(self):
+        self.make_mocks(6)
+        arv_node = testutil.arvados_node_mock(6)
+        self.cloud_mock.extra['testname'] ='{n[hostname]}.{n[domain]}'.format(
+            n=arv_node)
+        self.make_actor()
+        self.node_actor.update_arvados_node(arv_node).get(self.TIMEOUT)
+        self.assertEqual(0, self.updates.sync_node.call_count)