Merge branch '9486-retry-instance-limit-exceeded' closes #9486
authorPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 13 Jul 2016 16:51:21 +0000 (12:51 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 13 Jul 2016 16:51:21 +0000 (12:51 -0400)
services/nodemanager/arvnodeman/computenode/driver/__init__.py
services/nodemanager/tests/test_computenode_dispatch.py

index db799bc16b806beb09313fb56bbcee8e88f4a00a..c78f1c6b8d63160c40e7e57d9b29f57d07e59dcf 100644 (file)
@@ -211,8 +211,10 @@ class BaseComputeNodeDriver(RetryMixin):
         # libcloud compute drivers typically raise bare Exceptions to
         # represent API errors.  Return True for any exception that is
         # exactly an Exception, or a better-known higher-level exception.
-        if (exception is BaseHTTPError and
-            self.message and self.message.startswith("InvalidInstanceID.NotFound")):
+        if (type(exception) is BaseHTTPError and
+            exception.message and
+            (exception.message.startswith("InvalidInstanceID.NotFound") or
+             exception.message.startswith("InstanceLimitExceeded"))):
             return True
         return (isinstance(exception, cls.CLOUD_ERRORS) or
                 type(exception) is Exception)
index 227b5e5f3471ba4cf2e484461cd2c651f26a96e1..c3774c1b7afd8fa3c53f36ace1444daab0a22d81 100644 (file)
@@ -11,7 +11,10 @@ import mock
 import pykka
 import threading
 
+from libcloud.common.exceptions import BaseHTTPError
+
 import arvnodeman.computenode.dispatch as dispatch
+from arvnodeman.computenode.driver import BaseComputeNodeDriver
 from . import testutil
 
 class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
@@ -25,6 +28,7 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.api_client.nodes().update().execute.side_effect = arvados_effect
         self.cloud_client = mock.MagicMock(name='cloud_client')
         self.cloud_client.create_node.return_value = testutil.cloud_node_mock(1)
+        self.cloud_client.is_cloud_exception = BaseComputeNodeDriver.is_cloud_exception
 
     def make_actor(self, arv_node=None):
         if not hasattr(self, 'timer'):
@@ -86,6 +90,28 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.make_actor()
         self.wait_for_assignment(self.setup_actor, 'cloud_node')
 
+    def test_unknown_basehttperror_not_retried(self):
+        self.make_mocks()
+        self.cloud_client.create_node.side_effect = [
+            BaseHTTPError(400, "Unknown"),
+            self.cloud_client.create_node.return_value,
+            ]
+        self.make_actor()
+        finished = threading.Event()
+        self.setup_actor.subscribe(lambda _: finished.set())
+        assert(finished.wait(self.TIMEOUT))
+        self.assertEqual(0, self.cloud_client.post_create_node.call_count)
+
+    def test_known_basehttperror_retried(self):
+        self.make_mocks()
+        self.cloud_client.create_node.side_effect = [
+            BaseHTTPError(400, "InstanceLimitExceeded"),
+            self.cloud_client.create_node.return_value,
+            ]
+        self.make_actor()
+        self.wait_for_assignment(self.setup_actor, 'cloud_node')
+        self.assertEqual(1, self.cloud_client.post_create_node.call_count)
+
     def test_failed_post_create_retried(self):
         self.make_mocks()
         self.cloud_client.post_create_node.side_effect = [