-#!/usr/bin/env python
+"""Utilities to retry operations.
+The core of this module is `RetryLoop`, a utility class to retry operations
+that might fail. It can distinguish between temporary and permanent failures;
+provide exponential backoff; and save a series of results.
+
+It also provides utility functions for common operations with `RetryLoop`:
+
+* `check_http_response_success` can be used as a `RetryLoop` `success_check`
+ for HTTP response codes from the Arvados API server.
+* `retry_method` can decorate methods to provide a default `num_retries`
+ keyword argument.
+"""
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from builtins import range
+from builtins import object
import functools
import inspect
+import pycurl
import time
from collections import deque
import arvados.errors
-_HTTP_SUCCESSES = set(xrange(200, 300))
-_HTTP_CAN_RETRY = set([408, 409, 422, 423, 500, 502, 503, 504])
+_HTTP_SUCCESSES = set(range(200, 300))
+_HTTP_CAN_RETRY = set([408, 409, 423, 500, 502, 503, 504])
class RetryLoop(object):
"""Coordinate limited retries of code.
- RetryLoop coordinates a loop that runs until it records a
+ `RetryLoop` coordinates a loop that runs until it records a
successful result or tries too many times, whichever comes first.
Typical use looks like:
loop.save_result(result)
if loop.success():
return loop.last_result()
+
+ Arguments:
+
+ * num_retries: int --- The maximum number of times to retry the loop if
+ it doesn't succeed. This means the loop body could run at most
+ `num_retries + 1` times.
+
+ * success_check: Callable --- This is a function that will be called
+ each time the loop saves a result. The function should return `True`
+ if the result indicates the code succeeded, `False` if it represents a
+ permanent failure, and `None` if it represents a temporary failure.
+ If no function is provided, the loop will end after any result is
+ saved.
+
+ * backoff_start: float --- The number of seconds that must pass before
+ the loop's second iteration. Default 0, which disables all waiting.
+
+ * backoff_growth: float --- The wait time multiplier after each
+ iteration. Default 2 (i.e., double the wait time each time).
+
+ * save_results: int --- Specify a number to store that many saved
+ results from the loop. These are available through the `results`
+ attribute, oldest first. Default 1.
+
+ * max_wait: float --- Maximum number of seconds to wait between
+ retries. Default 60.
"""
def __init__(self, num_retries, success_check=lambda r: True,
- backoff_start=0, backoff_growth=2, save_results=1):
- """Construct a new RetryLoop.
-
- Arguments:
- * num_retries: The maximum number of times to retry the loop if it
- doesn't succeed. This means the loop could run at most 1+N times.
- * success_check: This is a function that will be called each
- time the loop saves a result. The function should return
- True if the result indicates loop success, False if it
- represents a permanent failure state, and None if the loop
- should continue. If no function is provided, the loop will
- end as soon as it records any result.
- * backoff_start: The number of seconds that must pass before the
- loop's second iteration. Default 0, which disables all waiting.
- * backoff_growth: The wait time multiplier after each iteration.
- Default 2 (i.e., double the wait time each time).
- * save_results: Specify a number to save the last N results
- that the loop recorded. These records are available through
- the results attribute, oldest first. Default 1.
- """
+ backoff_start=0, backoff_growth=2, save_results=1,
+ max_wait=60):
self.tries_left = num_retries + 1
self.check_result = success_check
self.backoff_wait = backoff_start
self.backoff_growth = backoff_growth
+ self.max_wait = max_wait
self.next_start_time = 0
self.results = deque(maxlen=save_results)
+ self._attempts = 0
self._running = None
self._success = None
def __iter__(self):
+ """Return an iterator of retries."""
return self
def running(self):
+ """Return whether this loop is running.
+
+ Returns `None` if the loop has never run, `True` if it is still running,
+ or `False` if it has stopped—whether that's because it has saved a
+ successful result, a permanent failure, or has run out of retries.
+ """
return self._running and (self._success is None)
- def next(self):
+ def __next__(self):
+ """Record a loop attempt.
+
+ If the loop is still running, decrements the number of tries left and
+ returns it. Otherwise, raises `StopIteration`.
+ """
if self._running is None:
self._running = True
if (self.tries_left < 1) or not self.running():
wait_time = max(0, self.next_start_time - time.time())
time.sleep(wait_time)
self.backoff_wait *= self.backoff_growth
+ if self.backoff_wait > self.max_wait:
+ self.backoff_wait = self.max_wait
self.next_start_time = time.time() + self.backoff_wait
self.tries_left -= 1
return self.tries_left
"""Record a loop result.
Save the given result, and end the loop if it indicates
- success or permanent failure. See __init__'s documentation
- about success_check to learn how to make that indication.
+ success or permanent failure. See documentation for the `__init__`
+ `success_check` argument to learn how that's indicated.
+
+ Raises `arvados.errors.AssertionError` if called after the loop has
+ already ended.
+
+ Arguments:
+
+ * result: Any --- The result from this loop attempt to check and
+ save.
"""
if not self.running():
raise arvados.errors.AssertionError(
"recorded a loop result after the loop finished")
self.results.append(result)
self._success = self.check_result(result)
+ self._attempts += 1
def success(self):
"""Return the loop's end state.
- Returns True if the loop obtained a successful result, False if it
- encountered permanent failure, or else None.
+ Returns `True` if the loop recorded a successful result, `False` if it
+ recorded permanent failure, or else `None`.
"""
return self._success
def last_result(self):
- """Return the most recent result the loop recorded."""
+ """Return the most recent result the loop saved.
+
+ Raises `arvados.errors.AssertionError` if called before any result has
+ been saved.
+ """
try:
return self.results[-1]
except IndexError:
raise arvados.errors.AssertionError(
"queried loop results before any were recorded")
+ def attempts(self):
+ """Return the number of results that have been saved.
+
+ This count includes all kinds of results: success, permanent failure,
+ and temporary failure.
+ """
+ return self._attempts
+
+ def attempts_str(self):
+ """Return a human-friendly string counting saved results.
+
+ This method returns '1 attempt' or 'N attempts', where the number
+ in the string is the number of saved results.
+ """
+ if self._attempts == 1:
+ return '1 attempt'
+ else:
+ return '{} attempts'.format(self._attempts)
+
+
+def check_http_response_success(status_code):
+ """Convert a numeric HTTP status code to a loop control flag.
-def check_http_response_success(result):
- """Convert an httplib2 request result to a loop control flag.
+ This method takes a numeric HTTP status code and returns `True` if
+ the code indicates success, `None` if it indicates temporary
+ failure, and `False` otherwise. You can use this as the
+ `success_check` for a `RetryLoop` that queries the Arvados API server.
+ Specifically:
- Pass this method the 2-tuple returned by httplib2.Http.request. It
- returns True if the response indicates success, None if it indicates
- temporary failure, and False otherwise. You can use this as the
- success_check for a RetryLoop.
+ * Any 2xx result returns `True`.
- Implementation details:
- * Any 2xx result returns True.
- * A select few status codes, or any malformed responses, return None.
- 422 Unprocessable Entity is in this category. This may not meet the
- letter of the HTTP specification, but the Arvados API server will
- use it for various server-side problems like database connection
- errors.
- * Everything else returns False. Note that this includes 1xx and
+ * A select few status codes, or any malformed responses, return `None`.
+
+ * Everything else returns `False`. Note that this includes 1xx and
3xx status codes. They don't indicate success, and you can't
retry those requests verbatim.
+
+ Arguments:
+
+ * status_code: int --- A numeric HTTP response code
"""
- try:
- status = int(result[0].status)
- except Exception:
- return None
- if status in _HTTP_SUCCESSES:
+ if status_code in _HTTP_SUCCESSES:
return True
- elif status in _HTTP_CAN_RETRY:
+ elif status_code in _HTTP_CAN_RETRY:
return None
- elif 100 <= status < 600:
+ elif 100 <= status_code < 600:
return False
else:
return None # Get well soon, server.
"""Provide a default value for a method's num_retries argument.
This is a decorator for instance and class methods that accept a
- num_retries argument, with a None default. When the method is called
- without a value for num_retries, it will be set from the underlying
- instance or class' num_retries attribute.
+ `num_retries` keyword argument, with a `None` default. When the method
+ is called without a value for `num_retries`, this decorator will set it
+ from the `num_retries` attribute of the underlying instance or class.
+
+ Arguments:
+
+ * orig_func: Callable --- A class or instance method that accepts a
+ `num_retries` keyword argument
"""
@functools.wraps(orig_func)
def num_retries_setter(self, *args, **kwargs):
- arg_vals = inspect.getcallargs(orig_func, self, *args, **kwargs)
- if arg_vals['num_retries'] is None:
+ if kwargs.get('num_retries') is None:
kwargs['num_retries'] = self.num_retries
return orig_func(self, *args, **kwargs)
return num_retries_setter