3147: Add RetryLoop to the Python SDK.
authorBrett Smith <brett@curoverse.com>
Tue, 26 Aug 2014 13:46:17 +0000 (09:46 -0400)
committerBrett Smith <brett@curoverse.com>
Tue, 26 Aug 2014 15:32:00 +0000 (11:32 -0400)
This provides a general-purpose mechanism for us to retry all kinds of
operations.

sdk/python/arvados/retry.py [new file with mode: 0644]
sdk/python/setup.py
sdk/python/tests/test_retry.py [new file with mode: 0644]

diff --git a/sdk/python/arvados/retry.py b/sdk/python/arvados/retry.py
new file mode 100644 (file)
index 0000000..a5a303f
--- /dev/null
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+import time
+
+from collections import deque
+
+import arvados.errors
+
+class RetryLoop(object):
+    """Coordinate limited retries of code.
+
+    RetryLoop coordinates a loop that runs until it records a
+    successful result or tries too many times, whichever comes first.
+    Typical use looks like:
+
+        loop = RetryLoop(num_retries=2)
+        for tries_left in loop:
+            try:
+                result = do_something()
+            except TemporaryError as error:
+                log("error: {} ({} tries left)".format(error, tries_left))
+            else:
+                loop.save_result(result)
+        if loop.success():
+            return loop.last_result()
+    """
+    def __init__(self, num_retries, success_check=lambda r: True,
+                 backoff_start=0, backoff_growth=2, save_results=1):
+        """Construct a new RetryLoop.
+
+        Arguments:
+        * num_retries: The maximum number of times to retry the loop if it
+          doesn't succeed.  This means the loop could run at most 1+N times.
+        * success_check: This is a function that will be called each
+          time the loop saves a result.  The function should return
+          True if the result indicates loop success, False if it
+          represents a permanent failure state, and None if the loop
+          should continue.  If no function is provided, the loop will
+          end as soon as it records any result.
+        * backoff_start: The number of seconds that must pass before the
+          loop's second iteration.  Default 0, which disables all waiting.
+        * backoff_growth: The wait time multiplier after each iteration.
+          Default 2 (i.e., double the wait time each time).
+        * save_results: Specify a number to save the last N results
+          that the loop recorded.  These records are available through
+          the results attribute, oldest first.  Default 1.
+        """
+        self.tries_left = num_retries + 1
+        self.check_result = success_check
+        self.backoff_wait = backoff_start
+        self.backoff_growth = backoff_growth
+        self.next_start_time = 0
+        self.results = deque(maxlen=save_results)
+        self._running = None
+        self._success = None
+
+    def __iter__(self):
+        return self
+
+    def running(self):
+        return self._running and (self._success is None)
+
+    def next(self):
+        if self._running is None:
+            self._running = True
+        if (self.tries_left < 1) or not self.running():
+            self._running = False
+            raise StopIteration
+        else:
+            wait_time = max(0, self.next_start_time - time.time())
+            time.sleep(wait_time)
+            self.backoff_wait *= self.backoff_growth
+        self.next_start_time = time.time() + self.backoff_wait
+        self.tries_left -= 1
+        return self.tries_left
+
+    def save_result(self, result):
+        """Record a loop result.
+
+        Save the given result, and end the loop if it indicates
+        success or permanent failure.  See __init__'s documentation
+        about success_check to learn how to make that indication.
+        """
+        if not self.running():
+            raise arvados.errors.AssertionError(
+                "recorded a loop result after the loop finished")
+        self.results.append(result)
+        self._success = self.check_result(result)
+
+    def success(self):
+        """Return the loop's end state.
+
+        Returns True if the loop obtained a successful result, False if it
+        encountered permanent failure, or else None.
+        """
+        return self._success
+
+    def last_result(self):
+        """Return the most recent result the loop recorded."""
+        try:
+            return self.results[-1]
+        except IndexError:
+            raise arvados.errors.AssertionError(
+                "queried loop results before any were recorded")
index 2c51e8f1d0f6a5d56c06c73016ccf7ef1418f770..30cc7796495a2cdb2f7138a122acf114d2b13788 100644 (file)
@@ -32,5 +32,5 @@ setup(name='arvados-python-client',
         'ws4py'
         ],
       test_suite='tests',
-      tests_require=['PyYAML'],
+      tests_require=['mock', 'PyYAML'],
       zip_safe=False)
diff --git a/sdk/python/tests/test_retry.py b/sdk/python/tests/test_retry.py
new file mode 100644 (file)
index 0000000..131872b
--- /dev/null
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+
+import itertools
+import unittest
+
+import arvados.errors as arv_error
+import arvados.retry as arv_retry
+import mock
+
+from arvados_testutil import fake_httplib2_response
+
+class RetryLoopTestMixin(object):
+    @staticmethod
+    def loop_success(result):
+        # During the tests, we use integers that look like HTTP status
+        # codes as loop results.  Then we define simplified HTTP
+        # heuristics here to decide whether the result is success (True),
+        # permanent failure (False), or temporary failure (None).
+        if result < 400:
+            return True
+        elif result < 500:
+            return False
+        else:
+            return None
+
+    def run_loop(self, num_retries, *results, **kwargs):
+        responses = itertools.chain(results, itertools.repeat(None))
+        retrier = arv_retry.RetryLoop(num_retries, self.loop_success,
+                                      **kwargs)
+        for tries_left, response in itertools.izip(retrier, responses):
+            retrier.save_result(response)
+        return retrier
+
+    def check_result(self, retrier, expect_success, last_code):
+        self.assertIs(retrier.success(), expect_success,
+                      "loop success flag is incorrect")
+        self.assertEqual(last_code, retrier.last_result())
+
+
+class RetryLoopTestCase(unittest.TestCase, RetryLoopTestMixin):
+    def test_zero_retries_and_success(self):
+        retrier = self.run_loop(0, 200)
+        self.check_result(retrier, True, 200)
+
+    def test_zero_retries_and_tempfail(self):
+        retrier = self.run_loop(0, 500, 501)
+        self.check_result(retrier, None, 500)
+
+    def test_zero_retries_and_permfail(self):
+        retrier = self.run_loop(0, 400, 201)
+        self.check_result(retrier, False, 400)
+
+    def test_one_retry_with_immediate_success(self):
+        retrier = self.run_loop(1, 200, 201)
+        self.check_result(retrier, True, 200)
+
+    def test_one_retry_with_delayed_success(self):
+        retrier = self.run_loop(1, 500, 201)
+        self.check_result(retrier, True, 201)
+
+    def test_one_retry_with_no_success(self):
+        retrier = self.run_loop(1, 500, 501, 502)
+        self.check_result(retrier, None, 501)
+
+    def test_one_retry_but_permfail(self):
+        retrier = self.run_loop(1, 400, 201)
+        self.check_result(retrier, False, 400)
+
+    def test_two_retries_with_immediate_success(self):
+        retrier = self.run_loop(2, 200, 201, 202)
+        self.check_result(retrier, True, 200)
+
+    def test_two_retries_with_success_after_one(self):
+        retrier = self.run_loop(2, 500, 201, 502)
+        self.check_result(retrier, True, 201)
+
+    def test_two_retries_with_success_after_two(self):
+        retrier = self.run_loop(2, 500, 501, 202, 503)
+        self.check_result(retrier, True, 202)
+
+    def test_two_retries_with_no_success(self):
+        retrier = self.run_loop(2, 500, 501, 502, 503)
+        self.check_result(retrier, None, 502)
+
+    def test_two_retries_with_permfail(self):
+        retrier = self.run_loop(2, 500, 401, 202)
+        self.check_result(retrier, False, 401)
+
+    def test_save_result_before_start_is_error(self):
+        retrier = arv_retry.RetryLoop(0)
+        self.assertRaises(arv_error.AssertionError, retrier.save_result, 1)
+
+    def test_save_result_after_end_is_error(self):
+        retrier = arv_retry.RetryLoop(0)
+        for count in retrier:
+            pass
+        self.assertRaises(arv_error.AssertionError, retrier.save_result, 1)
+
+
+@mock.patch('time.time', side_effect=itertools.count())
+@mock.patch('time.sleep')
+class RetryLoopBackoffTestCase(unittest.TestCase, RetryLoopTestMixin):
+    def run_loop(self, num_retries, *results, **kwargs):
+        kwargs.setdefault('backoff_start', 8)
+        return super(RetryLoopBackoffTestCase, self).run_loop(
+            num_retries, *results, **kwargs)
+
+    def check_backoff(self, sleep_mock, sleep_count, multiplier=1):
+        # Figure out how much time we actually spent sleeping.
+        sleep_times = [arglist[0][0] for arglist in sleep_mock.call_args_list
+                       if arglist[0][0] > 0]
+        self.assertEqual(sleep_count, len(sleep_times),
+                         "loop did not back off correctly")
+        last_wait = 0
+        for this_wait in sleep_times:
+            self.assertGreater(this_wait, last_wait * multiplier,
+                               "loop did not grow backoff times correctly")
+            last_wait = this_wait
+
+    def test_no_backoff_with_no_retries(self, sleep_mock, time_mock):
+        self.run_loop(0, 500, 201)
+        self.check_backoff(sleep_mock, 0)
+
+    def test_no_backoff_after_success(self, sleep_mock, time_mock):
+        self.run_loop(1, 200, 501)
+        self.check_backoff(sleep_mock, 0)
+
+    def test_no_backoff_after_permfail(self, sleep_mock, time_mock):
+        self.run_loop(1, 400, 201)
+        self.check_backoff(sleep_mock, 0)
+
+    def test_backoff_before_success(self, sleep_mock, time_mock):
+        self.run_loop(5, 500, 501, 502, 203, 504)
+        self.check_backoff(sleep_mock, 3)
+
+    def test_backoff_before_permfail(self, sleep_mock, time_mock):
+        self.run_loop(5, 500, 501, 502, 403, 504)
+        self.check_backoff(sleep_mock, 3)
+
+    def test_backoff_all_tempfail(self, sleep_mock, time_mock):
+        self.run_loop(3, 500, 501, 502, 503, 504)
+        self.check_backoff(sleep_mock, 3)
+
+    def test_backoff_multiplier(self, sleep_mock, time_mock):
+        self.run_loop(5, 500, 501, 502, 503, 504, 505,
+                      backoff_start=5, backoff_growth=10)
+        self.check_backoff(sleep_mock, 5, 9)
+
+
+if __name__ == '__main__':
+    unittest.main()