1 """Utilities to retry operations.
3 The core of this module is `RetryLoop`, a utility class to retry operations
4 that might fail. It can distinguish between temporary and permanent failures;
5 provide exponential backoff; and save a series of results.
7 It also provides utility functions for common operations with `RetryLoop`:
9 * `check_http_response_success` can be used as a `RetryLoop` `success_check`
10 for HTTP response codes from the Arvados API server.
11 * `retry_method` can decorate methods to provide a default `num_retries`
14 # Copyright (C) The Arvados Authors. All rights reserved.
16 # SPDX-License-Identifier: Apache-2.0
18 from builtins import range
19 from builtins import object
25 from collections import deque
29 _HTTP_SUCCESSES = set(range(200, 300))
30 _HTTP_CAN_RETRY = set([408, 409, 422, 423, 500, 502, 503, 504])
32 class RetryLoop(object):
33 """Coordinate limited retries of code.
35 `RetryLoop` coordinates a loop that runs until it records a
36 successful result or tries too many times, whichever comes first.
37 Typical use looks like:
39 loop = RetryLoop(num_retries=2)
40 for tries_left in loop:
42 result = do_something()
43 except TemporaryError as error:
44 log("error: {} ({} tries left)".format(error, tries_left))
46 loop.save_result(result)
48 return loop.last_result()
53 : The maximum number of times to retry the loop if it
54 doesn't succeed. This means the loop body could run at most
55 `num_retries + 1` times.
57 success_check: Callable
58 : This is a function that will be called each
59 time the loop saves a result. The function should return
60 `True` if the result indicates the code succeeded, `False` if it
61 represents a permanent failure, and `None` if it represents a
62 temporary failure. If no function is provided, the loop will
63 end after any result is saved.
66 : The number of seconds that must pass before the loop's second
67 iteration. Default 0, which disables all waiting.
70 : The wait time multiplier after each iteration.
71 Default 2 (i.e., double the wait time each time).
74 : Specify a number to store that many saved results from the loop.
75 These are available through the `results` attribute, oldest first.
79 : Maximum number of seconds to wait between retries. Default 60.
81 def __init__(self, num_retries, success_check=lambda r: True,
82 backoff_start=0, backoff_growth=2, save_results=1,
84 self.tries_left = num_retries + 1
85 self.check_result = success_check
86 self.backoff_wait = backoff_start
87 self.backoff_growth = backoff_growth
88 self.max_wait = max_wait
89 self.next_start_time = 0
90 self.results = deque(maxlen=save_results)
96 """Return an iterator of retries."""
100 """Return whether this loop is running.
102 Returns `None` if the loop has never run, `True` if it is still running,
103 or `False` if it has stopped—whether that's because it has saved a
104 successful result, a permanent failure, or has run out of retries.
106 return self._running and (self._success is None)
109 """Record a loop attempt.
111 If the loop is still running, decrements the number of tries left and
112 returns it. Otherwise, raises `StopIteration`.
114 if self._running is None:
116 if (self.tries_left < 1) or not self.running():
117 self._running = False
120 wait_time = max(0, self.next_start_time - time.time())
121 time.sleep(wait_time)
122 self.backoff_wait *= self.backoff_growth
123 if self.backoff_wait > self.max_wait:
124 self.backoff_wait = self.max_wait
125 self.next_start_time = time.time() + self.backoff_wait
127 return self.tries_left
129 def save_result(self, result):
130 """Record a loop result.
132 Save the given result, and end the loop if it indicates
133 success or permanent failure. See documentation for the `__init__`
134 `success_check` argument to learn how that's indicated.
136 Raises `arvados.errors.AssertionError` if called after the loop has
142 : The result from this loop attempt to check and save.
144 if not self.running():
145 raise arvados.errors.AssertionError(
146 "recorded a loop result after the loop finished")
147 self.results.append(result)
148 self._success = self.check_result(result)
152 """Return the loop's end state.
154 Returns `True` if the loop recorded a successful result, `False` if it
155 recorded permanent failure, or else `None`.
159 def last_result(self):
160 """Return the most recent result the loop saved.
162 Raises `arvados.errors.AssertionError` if called before any result has
166 return self.results[-1]
168 raise arvados.errors.AssertionError(
169 "queried loop results before any were recorded")
172 """Return the number of results that have been saved.
174 This count includes all kinds of results: success, permanent failure,
175 and temporary failure.
177 return self._attempts
179 def attempts_str(self):
180 """Return a human-friendly string counting saved results.
182 This method returns '1 attempt' or 'N attempts', where the number
183 in the string is the number of saved results.
185 if self._attempts == 1:
188 return '{} attempts'.format(self._attempts)
191 def check_http_response_success(status_code):
192 """Convert a numeric HTTP status code to a loop control flag.
194 This method takes a numeric HTTP status code and returns `True` if
195 the code indicates success, `None` if it indicates temporary
196 failure, and `False` otherwise. You can use this as the
197 `success_check` for a `RetryLoop` that queries the Arvados API server.
200 * Any 2xx result returns `True`.
202 * A select few status codes, or any malformed responses, return `None`.
203 422 Unprocessable Entity is in this category. This may not meet the
204 letter of the HTTP specification, but the Arvados API server will
205 use it for various server-side problems like database connection
208 * Everything else returns `False`. Note that this includes 1xx and
209 3xx status codes. They don't indicate success, and you can't
210 retry those requests verbatim.
215 : A numeric HTTP response code
217 if status_code in _HTTP_SUCCESSES:
219 elif status_code in _HTTP_CAN_RETRY:
221 elif 100 <= status_code < 600:
224 return None # Get well soon, server.
226 def retry_method(orig_func):
227 """Provide a default value for a method's num_retries argument.
229 This is a decorator for instance and class methods that accept a
230 `num_retries` keyword argument, with a `None` default. When the method
231 is called without a value for `num_retries`, this decorator will set it
232 from the `num_retries` attribute of the underlying instance or class.
237 : A class or instance method that accepts a `num_retries` keyword argument
239 @functools.wraps(orig_func)
240 def num_retries_setter(self, *args, **kwargs):
241 if kwargs.get('num_retries') is None:
242 kwargs['num_retries'] = self.num_retries
243 return orig_func(self, *args, **kwargs)
244 return num_retries_setter