1 """Utilities to retry operations.
3 The core of this module is `RetryLoop`, a utility class to retry operations
4 that might fail. It can distinguish between temporary and permanent failures;
5 provide exponential backoff; and save a series of results.
7 It also provides utility functions for common operations with `RetryLoop`:
9 * `check_http_response_success` can be used as a `RetryLoop` `success_check`
10 for HTTP response codes from the Arvados API server.
11 * `retry_method` can decorate methods to provide a default `num_retries`
14 # Copyright (C) The Arvados Authors. All rights reserved.
16 # SPDX-License-Identifier: Apache-2.0
18 from builtins import range
19 from builtins import object
25 from collections import deque
29 _HTTP_SUCCESSES = set(range(200, 300))
30 _HTTP_CAN_RETRY = set([408, 409, 423, 500, 502, 503, 504])
32 class RetryLoop(object):
33 """Coordinate limited retries of code.
35 `RetryLoop` coordinates a loop that runs until it records a
36 successful result or tries too many times, whichever comes first.
37 Typical use looks like:
39 loop = RetryLoop(num_retries=2)
40 for tries_left in loop:
42 result = do_something()
43 except TemporaryError as error:
44 log("error: {} ({} tries left)".format(error, tries_left))
46 loop.save_result(result)
48 return loop.last_result()
53 : The maximum number of times to retry the loop if it
54 doesn't succeed. This means the loop body could run at most
55 `num_retries + 1` times.
57 success_check: Callable
58 : This is a function that will be called each
59 time the loop saves a result. The function should return
60 `True` if the result indicates the code succeeded, `False` if it
61 represents a permanent failure, and `None` if it represents a
62 temporary failure. If no function is provided, the loop will
63 end after any result is saved.
66 : The number of seconds that must pass before the loop's second
67 iteration. Default 0, which disables all waiting.
70 : The wait time multiplier after each iteration.
71 Default 2 (i.e., double the wait time each time).
74 : Specify a number to store that many saved results from the loop.
75 These are available through the `results` attribute, oldest first.
79 : Maximum number of seconds to wait between retries. Default 60.
81 def __init__(self, num_retries, success_check=lambda r: True,
82 backoff_start=0, backoff_growth=2, save_results=1,
84 self.tries_left = num_retries + 1
85 self.check_result = success_check
86 self.backoff_wait = backoff_start
87 self.backoff_growth = backoff_growth
88 self.max_wait = max_wait
89 self.next_start_time = 0
90 self.results = deque(maxlen=save_results)
96 """Return an iterator of retries."""
100 """Return whether this loop is running.
102 Returns `None` if the loop has never run, `True` if it is still running,
103 or `False` if it has stopped—whether that's because it has saved a
104 successful result, a permanent failure, or has run out of retries.
106 return self._running and (self._success is None)
109 """Record a loop attempt.
111 If the loop is still running, decrements the number of tries left and
112 returns it. Otherwise, raises `StopIteration`.
114 if self._running is None:
116 if (self.tries_left < 1) or not self.running():
117 self._running = False
120 wait_time = max(0, self.next_start_time - time.time())
121 time.sleep(wait_time)
122 self.backoff_wait *= self.backoff_growth
123 if self.backoff_wait > self.max_wait:
124 self.backoff_wait = self.max_wait
125 self.next_start_time = time.time() + self.backoff_wait
127 return self.tries_left
129 def save_result(self, result):
130 """Record a loop result.
132 Save the given result, and end the loop if it indicates
133 success or permanent failure. See documentation for the `__init__`
134 `success_check` argument to learn how that's indicated.
136 Raises `arvados.errors.AssertionError` if called after the loop has
142 : The result from this loop attempt to check and save.
144 if not self.running():
145 raise arvados.errors.AssertionError(
146 "recorded a loop result after the loop finished")
147 self.results.append(result)
148 self._success = self.check_result(result)
152 """Return the loop's end state.
154 Returns `True` if the loop recorded a successful result, `False` if it
155 recorded permanent failure, or else `None`.
159 def last_result(self):
160 """Return the most recent result the loop saved.
162 Raises `arvados.errors.AssertionError` if called before any result has
166 return self.results[-1]
168 raise arvados.errors.AssertionError(
169 "queried loop results before any were recorded")
172 """Return the number of results that have been saved.
174 This count includes all kinds of results: success, permanent failure,
175 and temporary failure.
177 return self._attempts
179 def attempts_str(self):
180 """Return a human-friendly string counting saved results.
182 This method returns '1 attempt' or 'N attempts', where the number
183 in the string is the number of saved results.
185 if self._attempts == 1:
188 return '{} attempts'.format(self._attempts)
191 def check_http_response_success(status_code):
192 """Convert a numeric HTTP status code to a loop control flag.
194 This method takes a numeric HTTP status code and returns `True` if
195 the code indicates success, `None` if it indicates temporary
196 failure, and `False` otherwise. You can use this as the
197 `success_check` for a `RetryLoop` that queries the Arvados API server.
200 * Any 2xx result returns `True`.
202 * A select few status codes, or any malformed responses, return `None`.
204 * Everything else returns `False`. Note that this includes 1xx and
205 3xx status codes. They don't indicate success, and you can't
206 retry those requests verbatim.
211 : A numeric HTTP response code
213 if status_code in _HTTP_SUCCESSES:
215 elif status_code in _HTTP_CAN_RETRY:
217 elif 100 <= status_code < 600:
220 return None # Get well soon, server.
222 def retry_method(orig_func):
223 """Provide a default value for a method's num_retries argument.
225 This is a decorator for instance and class methods that accept a
226 `num_retries` keyword argument, with a `None` default. When the method
227 is called without a value for `num_retries`, this decorator will set it
228 from the `num_retries` attribute of the underlying instance or class.
233 : A class or instance method that accepts a `num_retries` keyword argument
235 @functools.wraps(orig_func)
236 def num_retries_setter(self, *args, **kwargs):
237 if kwargs.get('num_retries') is None:
238 kwargs['num_retries'] = self.num_retries
239 return orig_func(self, *args, **kwargs)
240 return num_retries_setter