8206: Add test to support retry on create_driver.
authorPeter Amstutz <peter.amstutz@curoverse.com>
Mon, 25 Jan 2016 22:02:40 +0000 (17:02 -0500)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Mon, 25 Jan 2016 22:02:40 +0000 (17:02 -0500)
services/nodemanager/arvnodeman/computenode/__init__.py
services/nodemanager/arvnodeman/computenode/driver/__init__.py
services/nodemanager/arvnodeman/launcher.py
services/nodemanager/tests/testutil.py

index 1bea4ebda85c20a7d423b4152f5a3c54593fed52..bc8ada532d3d92ad2aa18d8a8d674f05edd3763d 100644 (file)
@@ -45,35 +45,48 @@ def arvados_node_missing(arvados_node, fresh_time):
         return not timestamp_fresh(arvados_timestamp(arvados_node["last_ping_at"]), fresh_time)
 
 def _retry(errors=()):
-    """Retry decorator for an actor method that makes remote requests.
+    """Retry decorator for an method that makes remote requests.
+
+    Use this function to decorate method, and pass in a tuple of exceptions to
+    catch.  If the original method raises a known cloud driver error, or any of
+    the given exception types, this decorator will either go into a
+    sleep-and-retry loop with exponential backoff either by sleeping (if
+    self._timer is None) or by scheduling retries of the method (if self._timer
+    is a timer actor.)
 
-    Use this function to decorator an actor method, and pass in a
-    tuple of exceptions to catch.  This decorator will schedule
-    retries of that method with exponential backoff if the
-    original method raises a known cloud driver error, or any of the
-    given exception types.
     """
+
     def decorator(orig_func):
         @functools.wraps(orig_func)
         def retry_wrapper(self, *args, **kwargs):
             start_time = time.time()
-            try:
-                return orig_func(self, *args, **kwargs)
-            except Exception as error:
-                if not (isinstance(error, errors) or
-                        self._cloud.is_cloud_exception(error)):
-                    raise
-                self._logger.warning(
-                    "Client error: %s - waiting %s seconds",
-                    error, self.retry_wait)
-                self._timer.schedule(start_time + self.retry_wait,
-                                     getattr(self._later,
-                                             orig_func.__name__),
-                                     *args, **kwargs)
-                self.retry_wait = min(self.retry_wait * 2,
-                                      self.max_retry_wait)
-            else:
-                self.retry_wait = self.min_retry_wait
+            while True:
+                try:
+                    ret = orig_func(self, *args, **kwargs)
+                except Exception as error:
+                    if not (isinstance(error, errors) or
+                            self._cloud.is_cloud_exception(error)):
+                        raise
+                    self._logger.warning(
+                        "Client error: %s - waiting %s seconds",
+                        error, self.retry_wait)
+                    if self._timer:
+                        # reschedule to be called again
+                        self._timer.schedule(start_time + self.retry_wait,
+                                             getattr(self._later,
+                                                     orig_func.__name__),
+                                             *args, **kwargs)
+                    else:
+                        # sleep on it.
+                        time.sleep(self.retry_wait)
+                    self.retry_wait = min(self.retry_wait * 2,
+                                          self.max_retry_wait)
+                    if self._timer:
+                        # expect to be called again by timer so don't loop
+                        return
+                else:
+                    self.retry_wait = self.min_retry_wait
+                    return ret
         return retry_wrapper
     return decorator
 
index f0d53142e1b0d5bae681f786bd2e5d55fb54b1db..779209bd645a6048832591fc995f8bb860353fe5 100644 (file)
@@ -2,6 +2,7 @@
 
 from __future__ import absolute_import, print_function
 
+import logging
 from operator import attrgetter
 
 import libcloud.common.types as cloud_types
@@ -45,6 +46,10 @@ class BaseComputeNodeDriver(object):
         """
         self.min_retry_wait = retry_wait
         self.max_retry_wait = max_retry_wait
+        self.retry_wait = retry_wait
+        self._cloud = type(self)
+        self._logger = logging.getLogger(str(self._cloud))
+        self._timer = None
         self.real = self._create_driver(driver_class, **auth_kwargs)
         self.list_kwargs = list_kwargs
         self.create_kwargs = create_kwargs
index e8c2fe661e5203469a1ee87341159aeb6cdc1aec..1ee792858f251b219ce070166c4d68fb2f101d38 100644 (file)
@@ -71,7 +71,7 @@ def launch_pollers(config, server_calculator):
 
     timer = TimedCallBackActor.start(poll_time / 10.0).proxy()
     cloud_node_poller = CloudNodeListMonitorActor.start(
-        config.new_cloud_client(), timer, poll_time, max_poll_time).proxy()
+        config.new_cloud_client(timer), timer, poll_time, max_poll_time).proxy()
     arvados_node_poller = ArvadosNodeListMonitorActor.start(
         config.new_arvados_client(), timer, poll_time, max_poll_time).proxy()
     job_queue_poller = JobQueueMonitorActor.start(
index e543c2891698c6f0626c3bf5f444cd1c0b49a86b..b266ed62da4c502636d62c2b320216df5b098250 100644 (file)
@@ -130,6 +130,13 @@ class DriverTestMixin(object):
     def driver_method_args(self, method_name):
         return getattr(self.driver_mock(), method_name).call_args
 
+    def test_driver_create_retry(self):
+        driver_mock2 = mock.MagicMock(name='driver_mock2')
+        self.driver_mock.side_effect = (Exception("oops"), driver_mock2)
+        kwargs = {'user_id': 'foo'}
+        driver = self.new_driver(auth_kwargs=kwargs)
+        self.assertTrue(self.driver_mock.called)
+        self.assertIs(driver.real, driver_mock2)
 
 class RemotePollLoopActorTestMixin(ActorTestMixin):
     def build_monitor(self, *args, **kwargs):