X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/1b75afdd0c278d34c1a99ed41814eb5119a254a9..2b7d05cdf4e054024607b859bd6fb41e04855bfa:/sdk/python/arvados/retry.py diff --git a/sdk/python/arvados/retry.py b/sdk/python/arvados/retry.py index ea4095930f..e93624a5d1 100644 --- a/sdk/python/arvados/retry.py +++ b/sdk/python/arvados/retry.py @@ -1,3 +1,16 @@ +"""Utilities to retry operations. + +The core of this module is `RetryLoop`, a utility class to retry operations +that might fail. It can distinguish between temporary and permanent failures; +provide exponential backoff; and save a series of results. + +It also provides utility functions for common operations with `RetryLoop`: + +* `check_http_response_success` can be used as a `RetryLoop` `success_check` + for HTTP response codes from the Arvados API server. +* `retry_method` can decorate methods to provide a default `num_retries` + keyword argument. +""" # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 @@ -19,7 +32,7 @@ _HTTP_CAN_RETRY = set([408, 409, 422, 423, 500, 502, 503, 504]) class RetryLoop(object): """Coordinate limited retries of code. - RetryLoop coordinates a loop that runs until it records a + `RetryLoop` coordinates a loop that runs until it records a successful result or tries too many times, whichever comes first. Typical use looks like: @@ -33,30 +46,41 @@ class RetryLoop(object): loop.save_result(result) if loop.success(): return loop.last_result() + + Arguments: + + num_retries: int + : The maximum number of times to retry the loop if it + doesn't succeed. This means the loop body could run at most + `num_retries + 1` times. + + success_check: Callable + : This is a function that will be called each + time the loop saves a result. The function should return + `True` if the result indicates the code succeeded, `False` if it + represents a permanent failure, and `None` if it represents a + temporary failure. If no function is provided, the loop will + end after any result is saved. + + backoff_start: float + : The number of seconds that must pass before the loop's second + iteration. Default 0, which disables all waiting. + + backoff_growth: float + : The wait time multiplier after each iteration. + Default 2 (i.e., double the wait time each time). + + save_results: int + : Specify a number to store that many saved results from the loop. + These are available through the `results` attribute, oldest first. + Default 1. + + max_wait: float + : Maximum number of seconds to wait between retries. Default 60. """ def __init__(self, num_retries, success_check=lambda r: True, backoff_start=0, backoff_growth=2, save_results=1, max_wait=60): - """Construct a new RetryLoop. - - Arguments: - * num_retries: The maximum number of times to retry the loop if it - doesn't succeed. This means the loop could run at most 1+N times. - * success_check: This is a function that will be called each - time the loop saves a result. The function should return - True if the result indicates loop success, False if it - represents a permanent failure state, and None if the loop - should continue. If no function is provided, the loop will - end as soon as it records any result. - * backoff_start: The number of seconds that must pass before the - loop's second iteration. Default 0, which disables all waiting. - * backoff_growth: The wait time multiplier after each iteration. - Default 2 (i.e., double the wait time each time). - * save_results: Specify a number to save the last N results - that the loop recorded. These records are available through - the results attribute, oldest first. Default 1. - * max_wait: Maximum number of seconds to wait between retries. - """ self.tries_left = num_retries + 1 self.check_result = success_check self.backoff_wait = backoff_start @@ -69,12 +93,24 @@ class RetryLoop(object): self._success = None def __iter__(self): + """Return an iterator of retries.""" return self def running(self): + """Return whether this loop is running. + + Returns `None` if the loop has never run, `True` if it is still running, + or `False` if it has stopped—whether that's because it has saved a + successful result, a permanent failure, or has run out of retries. + """ return self._running and (self._success is None) def __next__(self): + """Record a loop attempt. + + If the loop is still running, decrements the number of tries left and + returns it. Otherwise, raises `StopIteration`. + """ if self._running is None: self._running = True if (self.tries_left < 1) or not self.running(): @@ -94,8 +130,16 @@ class RetryLoop(object): """Record a loop result. Save the given result, and end the loop if it indicates - success or permanent failure. See __init__'s documentation - about success_check to learn how to make that indication. + success or permanent failure. See documentation for the `__init__` + `success_check` argument to learn how that's indicated. + + Raises `arvados.errors.AssertionError` if called after the loop has + already ended. + + Arguments: + + result: Any + : The result from this loop attempt to check and save. """ if not self.running(): raise arvados.errors.AssertionError( @@ -107,13 +151,17 @@ class RetryLoop(object): def success(self): """Return the loop's end state. - Returns True if the loop obtained a successful result, False if it - encountered permanent failure, or else None. + Returns `True` if the loop recorded a successful result, `False` if it + recorded permanent failure, or else `None`. """ return self._success def last_result(self): - """Return the most recent result the loop recorded.""" + """Return the most recent result the loop saved. + + Raises `arvados.errors.AssertionError` if called before any result has + been saved. + """ try: return self.results[-1] except IndexError: @@ -121,13 +169,19 @@ class RetryLoop(object): "queried loop results before any were recorded") def attempts(self): - """Return the number of attempts that have been made. + """Return the number of results that have been saved. - Includes successes and failures.""" + This count includes all kinds of results: success, permanent failure, + and temporary failure. + """ return self._attempts def attempts_str(self): - """Human-readable attempts(): 'N attempts' or '1 attempt'""" + """Return a human-friendly string counting saved results. + + This method returns '1 attempt' or 'N attempts', where the number + in the string is the number of saved results. + """ if self._attempts == 1: return '1 attempt' else: @@ -135,23 +189,30 @@ class RetryLoop(object): def check_http_response_success(status_code): - """Convert an HTTP status code to a loop control flag. + """Convert a numeric HTTP status code to a loop control flag. - Pass this method a numeric HTTP status code. It returns True if - the code indicates success, None if it indicates temporary - failure, and False otherwise. You can use this as the - success_check for a RetryLoop. + This method takes a numeric HTTP status code and returns `True` if + the code indicates success, `None` if it indicates temporary + failure, and `False` otherwise. You can use this as the + `success_check` for a `RetryLoop` that queries the Arvados API server. + Specifically: - Implementation details: - * Any 2xx result returns True. - * A select few status codes, or any malformed responses, return None. + * Any 2xx result returns `True`. + + * A select few status codes, or any malformed responses, return `None`. 422 Unprocessable Entity is in this category. This may not meet the letter of the HTTP specification, but the Arvados API server will use it for various server-side problems like database connection errors. - * Everything else returns False. Note that this includes 1xx and + + * Everything else returns `False`. Note that this includes 1xx and 3xx status codes. They don't indicate success, and you can't retry those requests verbatim. + + Arguments: + + status_code: int + : A numeric HTTP response code """ if status_code in _HTTP_SUCCESSES: return True @@ -166,9 +227,14 @@ def retry_method(orig_func): """Provide a default value for a method's num_retries argument. This is a decorator for instance and class methods that accept a - num_retries argument, with a None default. When the method is called - without a value for num_retries, it will be set from the underlying - instance or class' num_retries attribute. + `num_retries` keyword argument, with a `None` default. When the method + is called without a value for `num_retries`, this decorator will set it + from the `num_retries` attribute of the underlying instance or class. + + Arguments: + + orig_func: Callable + : A class or instance method that accepts a `num_retries` keyword argument """ @functools.wraps(orig_func) def num_retries_setter(self, *args, **kwargs):