1 """Utilities to retry operations.
3 The core of this module is `RetryLoop`, a utility class to retry operations
4 that might fail. It can distinguish between temporary and permanent failures;
5 provide exponential backoff; and save a series of results.
7 It also provides utility functions for common operations with `RetryLoop`:
9 * `check_http_response_success` can be used as a `RetryLoop` `success_check`
10 for HTTP response codes from the Arvados API server.
11 * `retry_method` can decorate methods to provide a default `num_retries`
14 # Copyright (C) The Arvados Authors. All rights reserved.
16 # SPDX-License-Identifier: Apache-2.0
18 from builtins import range
19 from builtins import object
25 from collections import deque
29 _HTTP_SUCCESSES = set(range(200, 300))
30 _HTTP_CAN_RETRY = set([408, 409, 423, 500, 502, 503, 504])
32 class RetryLoop(object):
33 """Coordinate limited retries of code.
35 `RetryLoop` coordinates a loop that runs until it records a
36 successful result or tries too many times, whichever comes first.
37 Typical use looks like:
39 loop = RetryLoop(num_retries=2)
40 for tries_left in loop:
42 result = do_something()
43 except TemporaryError as error:
44 log("error: {} ({} tries left)".format(error, tries_left))
46 loop.save_result(result)
48 return loop.last_result()
52 * num_retries: int --- The maximum number of times to retry the loop if
53 it doesn't succeed. This means the loop body could run at most
54 `num_retries + 1` times.
56 * success_check: Callable --- This is a function that will be called
57 each time the loop saves a result. The function should return `True`
58 if the result indicates the code succeeded, `False` if it represents a
59 permanent failure, and `None` if it represents a temporary failure.
60 If no function is provided, the loop will end after any result is
63 * backoff_start: float --- The number of seconds that must pass before
64 the loop's second iteration. Default 0, which disables all waiting.
66 * backoff_growth: float --- The wait time multiplier after each
67 iteration. Default 2 (i.e., double the wait time each time).
69 * save_results: int --- Specify a number to store that many saved
70 results from the loop. These are available through the `results`
71 attribute, oldest first. Default 1.
73 * max_wait: float --- Maximum number of seconds to wait between
76 def __init__(self, num_retries, success_check=lambda r: True,
77 backoff_start=0, backoff_growth=2, save_results=1,
79 self.tries_left = num_retries + 1
80 self.check_result = success_check
81 self.backoff_wait = backoff_start
82 self.backoff_growth = backoff_growth
83 self.max_wait = max_wait
84 self.next_start_time = 0
85 self.results = deque(maxlen=save_results)
91 """Return an iterator of retries."""
95 """Return whether this loop is running.
97 Returns `None` if the loop has never run, `True` if it is still running,
98 or `False` if it has stopped—whether that's because it has saved a
99 successful result, a permanent failure, or has run out of retries.
101 return self._running and (self._success is None)
104 """Record a loop attempt.
106 If the loop is still running, decrements the number of tries left and
107 returns it. Otherwise, raises `StopIteration`.
109 if self._running is None:
111 if (self.tries_left < 1) or not self.running():
112 self._running = False
115 wait_time = max(0, self.next_start_time - time.time())
116 time.sleep(wait_time)
117 self.backoff_wait *= self.backoff_growth
118 if self.backoff_wait > self.max_wait:
119 self.backoff_wait = self.max_wait
120 self.next_start_time = time.time() + self.backoff_wait
122 return self.tries_left
124 def save_result(self, result):
125 """Record a loop result.
127 Save the given result, and end the loop if it indicates
128 success or permanent failure. See documentation for the `__init__`
129 `success_check` argument to learn how that's indicated.
131 Raises `arvados.errors.AssertionError` if called after the loop has
136 * result: Any --- The result from this loop attempt to check and
139 if not self.running():
140 raise arvados.errors.AssertionError(
141 "recorded a loop result after the loop finished")
142 self.results.append(result)
143 self._success = self.check_result(result)
147 """Return the loop's end state.
149 Returns `True` if the loop recorded a successful result, `False` if it
150 recorded permanent failure, or else `None`.
154 def last_result(self):
155 """Return the most recent result the loop saved.
157 Raises `arvados.errors.AssertionError` if called before any result has
161 return self.results[-1]
163 raise arvados.errors.AssertionError(
164 "queried loop results before any were recorded")
167 """Return the number of results that have been saved.
169 This count includes all kinds of results: success, permanent failure,
170 and temporary failure.
172 return self._attempts
174 def attempts_str(self):
175 """Return a human-friendly string counting saved results.
177 This method returns '1 attempt' or 'N attempts', where the number
178 in the string is the number of saved results.
180 if self._attempts == 1:
183 return '{} attempts'.format(self._attempts)
186 def check_http_response_success(status_code):
187 """Convert a numeric HTTP status code to a loop control flag.
189 This method takes a numeric HTTP status code and returns `True` if
190 the code indicates success, `None` if it indicates temporary
191 failure, and `False` otherwise. You can use this as the
192 `success_check` for a `RetryLoop` that queries the Arvados API server.
195 * Any 2xx result returns `True`.
197 * A select few status codes, or any malformed responses, return `None`.
199 * Everything else returns `False`. Note that this includes 1xx and
200 3xx status codes. They don't indicate success, and you can't
201 retry those requests verbatim.
205 * status_code: int --- A numeric HTTP response code
207 if status_code in _HTTP_SUCCESSES:
209 elif status_code in _HTTP_CAN_RETRY:
211 elif 100 <= status_code < 600:
214 return None # Get well soon, server.
216 def retry_method(orig_func):
217 """Provide a default value for a method's num_retries argument.
219 This is a decorator for instance and class methods that accept a
220 `num_retries` keyword argument, with a `None` default. When the method
221 is called without a value for `num_retries`, this decorator will set it
222 from the `num_retries` attribute of the underlying instance or class.
226 * orig_func: Callable --- A class or instance method that accepts a
227 `num_retries` keyword argument
229 @functools.wraps(orig_func)
230 def num_retries_setter(self, *args, **kwargs):
231 if kwargs.get('num_retries') is None:
232 kwargs['num_retries'] = self.num_retries
233 return orig_func(self, *args, **kwargs)
234 return num_retries_setter