1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 from builtins import range
6 from builtins import object
12 from collections import deque
16 _HTTP_SUCCESSES = set(range(200, 300))
17 _HTTP_CAN_RETRY = set([408, 409, 422, 423, 500, 502, 503, 504])
19 class RetryLoop(object):
20 """Coordinate limited retries of code.
22 RetryLoop coordinates a loop that runs until it records a
23 successful result or tries too many times, whichever comes first.
24 Typical use looks like:
26 loop = RetryLoop(num_retries=2)
27 for tries_left in loop:
29 result = do_something()
30 except TemporaryError as error:
31 log("error: {} ({} tries left)".format(error, tries_left))
33 loop.save_result(result)
35 return loop.last_result()
37 def __init__(self, num_retries, success_check=lambda r: True,
38 backoff_start=0, backoff_growth=2, save_results=1,
40 """Construct a new RetryLoop.
43 * num_retries: The maximum number of times to retry the loop if it
44 doesn't succeed. This means the loop could run at most 1+N times.
45 * success_check: This is a function that will be called each
46 time the loop saves a result. The function should return
47 True if the result indicates loop success, False if it
48 represents a permanent failure state, and None if the loop
49 should continue. If no function is provided, the loop will
50 end as soon as it records any result.
51 * backoff_start: The number of seconds that must pass before the
52 loop's second iteration. Default 0, which disables all waiting.
53 * backoff_growth: The wait time multiplier after each iteration.
54 Default 2 (i.e., double the wait time each time).
55 * save_results: Specify a number to save the last N results
56 that the loop recorded. These records are available through
57 the results attribute, oldest first. Default 1.
58 * max_wait: Maximum number of seconds to wait between retries.
60 self.tries_left = num_retries + 1
61 self.check_result = success_check
62 self.backoff_wait = backoff_start
63 self.backoff_growth = backoff_growth
64 self.max_wait = max_wait
65 self.next_start_time = 0
66 self.results = deque(maxlen=save_results)
75 return self._running and (self._success is None)
78 if self._running is None:
80 if (self.tries_left < 1) or not self.running():
84 wait_time = max(0, self.next_start_time - time.time())
86 self.backoff_wait *= self.backoff_growth
87 if self.backoff_wait > self.max_wait:
88 self.backoff_wait = self.max_wait
89 self.next_start_time = time.time() + self.backoff_wait
91 return self.tries_left
93 def save_result(self, result):
94 """Record a loop result.
96 Save the given result, and end the loop if it indicates
97 success or permanent failure. See __init__'s documentation
98 about success_check to learn how to make that indication.
100 if not self.running():
101 raise arvados.errors.AssertionError(
102 "recorded a loop result after the loop finished")
103 self.results.append(result)
104 self._success = self.check_result(result)
108 """Return the loop's end state.
110 Returns True if the loop obtained a successful result, False if it
111 encountered permanent failure, or else None.
115 def last_result(self):
116 """Return the most recent result the loop recorded."""
118 return self.results[-1]
120 raise arvados.errors.AssertionError(
121 "queried loop results before any were recorded")
124 """Return the number of attempts that have been made.
126 Includes successes and failures."""
127 return self._attempts
129 def attempts_str(self):
130 """Human-readable attempts(): 'N attempts' or '1 attempt'"""
131 if self._attempts == 1:
134 return '{} attempts'.format(self._attempts)
137 def check_http_response_success(status_code):
138 """Convert an HTTP status code to a loop control flag.
140 Pass this method a numeric HTTP status code. It returns True if
141 the code indicates success, None if it indicates temporary
142 failure, and False otherwise. You can use this as the
143 success_check for a RetryLoop.
145 Implementation details:
146 * Any 2xx result returns True.
147 * A select few status codes, or any malformed responses, return None.
148 422 Unprocessable Entity is in this category. This may not meet the
149 letter of the HTTP specification, but the Arvados API server will
150 use it for various server-side problems like database connection
152 * Everything else returns False. Note that this includes 1xx and
153 3xx status codes. They don't indicate success, and you can't
154 retry those requests verbatim.
156 if status_code in _HTTP_SUCCESSES:
158 elif status_code in _HTTP_CAN_RETRY:
160 elif 100 <= status_code < 600:
163 return None # Get well soon, server.
165 def retry_method(orig_func):
166 """Provide a default value for a method's num_retries argument.
168 This is a decorator for instance and class methods that accept a
169 num_retries argument, with a None default. When the method is called
170 without a value for num_retries, it will be set from the underlying
171 instance or class' num_retries attribute.
173 @functools.wraps(orig_func)
174 def num_retries_setter(self, *args, **kwargs):
175 if kwargs.get('num_retries') is None:
176 kwargs['num_retries'] = self.num_retries
177 return orig_func(self, *args, **kwargs)
178 return num_retries_setter