X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/bc4992f9e11cd0639cf6461736963d66830c5af8..09cbdc3074b3f1e69c9c537875146f6da0a6ed8f:/sdk/python/arvados/retry.py diff --git a/sdk/python/arvados/retry.py b/sdk/python/arvados/retry.py index 52a68faa6f..e93624a5d1 100644 --- a/sdk/python/arvados/retry.py +++ b/sdk/python/arvados/retry.py @@ -1,20 +1,38 @@ -#!/usr/bin/env python +"""Utilities to retry operations. +The core of this module is `RetryLoop`, a utility class to retry operations +that might fail. It can distinguish between temporary and permanent failures; +provide exponential backoff; and save a series of results. + +It also provides utility functions for common operations with `RetryLoop`: + +* `check_http_response_success` can be used as a `RetryLoop` `success_check` + for HTTP response codes from the Arvados API server. +* `retry_method` can decorate methods to provide a default `num_retries` + keyword argument. +""" +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +from builtins import range +from builtins import object import functools import inspect +import pycurl import time from collections import deque import arvados.errors -_HTTP_SUCCESSES = set(xrange(200, 300)) +_HTTP_SUCCESSES = set(range(200, 300)) _HTTP_CAN_RETRY = set([408, 409, 422, 423, 500, 502, 503, 504]) class RetryLoop(object): """Coordinate limited retries of code. - RetryLoop coordinates a loop that runs until it records a + `RetryLoop` coordinates a loop that runs until it records a successful result or tries too many times, whichever comes first. Typical use looks like: @@ -28,44 +46,71 @@ class RetryLoop(object): loop.save_result(result) if loop.success(): return loop.last_result() + + Arguments: + + num_retries: int + : The maximum number of times to retry the loop if it + doesn't succeed. This means the loop body could run at most + `num_retries + 1` times. + + success_check: Callable + : This is a function that will be called each + time the loop saves a result. The function should return + `True` if the result indicates the code succeeded, `False` if it + represents a permanent failure, and `None` if it represents a + temporary failure. If no function is provided, the loop will + end after any result is saved. + + backoff_start: float + : The number of seconds that must pass before the loop's second + iteration. Default 0, which disables all waiting. + + backoff_growth: float + : The wait time multiplier after each iteration. + Default 2 (i.e., double the wait time each time). + + save_results: int + : Specify a number to store that many saved results from the loop. + These are available through the `results` attribute, oldest first. + Default 1. + + max_wait: float + : Maximum number of seconds to wait between retries. Default 60. """ def __init__(self, num_retries, success_check=lambda r: True, - backoff_start=0, backoff_growth=2, save_results=1): - """Construct a new RetryLoop. - - Arguments: - * num_retries: The maximum number of times to retry the loop if it - doesn't succeed. This means the loop could run at most 1+N times. - * success_check: This is a function that will be called each - time the loop saves a result. The function should return - True if the result indicates loop success, False if it - represents a permanent failure state, and None if the loop - should continue. If no function is provided, the loop will - end as soon as it records any result. - * backoff_start: The number of seconds that must pass before the - loop's second iteration. Default 0, which disables all waiting. - * backoff_growth: The wait time multiplier after each iteration. - Default 2 (i.e., double the wait time each time). - * save_results: Specify a number to save the last N results - that the loop recorded. These records are available through - the results attribute, oldest first. Default 1. - """ + backoff_start=0, backoff_growth=2, save_results=1, + max_wait=60): self.tries_left = num_retries + 1 self.check_result = success_check self.backoff_wait = backoff_start self.backoff_growth = backoff_growth + self.max_wait = max_wait self.next_start_time = 0 self.results = deque(maxlen=save_results) + self._attempts = 0 self._running = None self._success = None def __iter__(self): + """Return an iterator of retries.""" return self def running(self): + """Return whether this loop is running. + + Returns `None` if the loop has never run, `True` if it is still running, + or `False` if it has stopped—whether that's because it has saved a + successful result, a permanent failure, or has run out of retries. + """ return self._running and (self._success is None) - def next(self): + def __next__(self): + """Record a loop attempt. + + If the loop is still running, decrements the number of tries left and + returns it. Otherwise, raises `StopIteration`. + """ if self._running is None: self._running = True if (self.tries_left < 1) or not self.running(): @@ -75,6 +120,8 @@ class RetryLoop(object): wait_time = max(0, self.next_start_time - time.time()) time.sleep(wait_time) self.backoff_wait *= self.backoff_growth + if self.backoff_wait > self.max_wait: + self.backoff_wait = self.max_wait self.next_start_time = time.time() + self.backoff_wait self.tries_left -= 1 return self.tries_left @@ -83,60 +130,95 @@ class RetryLoop(object): """Record a loop result. Save the given result, and end the loop if it indicates - success or permanent failure. See __init__'s documentation - about success_check to learn how to make that indication. + success or permanent failure. See documentation for the `__init__` + `success_check` argument to learn how that's indicated. + + Raises `arvados.errors.AssertionError` if called after the loop has + already ended. + + Arguments: + + result: Any + : The result from this loop attempt to check and save. """ if not self.running(): raise arvados.errors.AssertionError( "recorded a loop result after the loop finished") self.results.append(result) self._success = self.check_result(result) + self._attempts += 1 def success(self): """Return the loop's end state. - Returns True if the loop obtained a successful result, False if it - encountered permanent failure, or else None. + Returns `True` if the loop recorded a successful result, `False` if it + recorded permanent failure, or else `None`. """ return self._success def last_result(self): - """Return the most recent result the loop recorded.""" + """Return the most recent result the loop saved. + + Raises `arvados.errors.AssertionError` if called before any result has + been saved. + """ try: return self.results[-1] except IndexError: raise arvados.errors.AssertionError( "queried loop results before any were recorded") + def attempts(self): + """Return the number of results that have been saved. + + This count includes all kinds of results: success, permanent failure, + and temporary failure. + """ + return self._attempts + + def attempts_str(self): + """Return a human-friendly string counting saved results. + + This method returns '1 attempt' or 'N attempts', where the number + in the string is the number of saved results. + """ + if self._attempts == 1: + return '1 attempt' + else: + return '{} attempts'.format(self._attempts) + + +def check_http_response_success(status_code): + """Convert a numeric HTTP status code to a loop control flag. -def check_http_response_success(result): - """Convert a 'requests' response to a loop control flag. + This method takes a numeric HTTP status code and returns `True` if + the code indicates success, `None` if it indicates temporary + failure, and `False` otherwise. You can use this as the + `success_check` for a `RetryLoop` that queries the Arvados API server. + Specifically: - Pass this method a requests.Response object. It returns True if - the response indicates success, None if it indicates temporary - failure, and False otherwise. You can use this as the - success_check for a RetryLoop. + * Any 2xx result returns `True`. - Implementation details: - * Any 2xx result returns True. - * A select few status codes, or any malformed responses, return None. + * A select few status codes, or any malformed responses, return `None`. 422 Unprocessable Entity is in this category. This may not meet the letter of the HTTP specification, but the Arvados API server will use it for various server-side problems like database connection errors. - * Everything else returns False. Note that this includes 1xx and + + * Everything else returns `False`. Note that this includes 1xx and 3xx status codes. They don't indicate success, and you can't retry those requests verbatim. + + Arguments: + + status_code: int + : A numeric HTTP response code """ - try: - status = result.status_code - except Exception: - return None - if status in _HTTP_SUCCESSES: + if status_code in _HTTP_SUCCESSES: return True - elif status in _HTTP_CAN_RETRY: + elif status_code in _HTTP_CAN_RETRY: return None - elif 100 <= status < 600: + elif 100 <= status_code < 600: return False else: return None # Get well soon, server. @@ -145,14 +227,18 @@ def retry_method(orig_func): """Provide a default value for a method's num_retries argument. This is a decorator for instance and class methods that accept a - num_retries argument, with a None default. When the method is called - without a value for num_retries, it will be set from the underlying - instance or class' num_retries attribute. + `num_retries` keyword argument, with a `None` default. When the method + is called without a value for `num_retries`, this decorator will set it + from the `num_retries` attribute of the underlying instance or class. + + Arguments: + + orig_func: Callable + : A class or instance method that accepts a `num_retries` keyword argument """ @functools.wraps(orig_func) def num_retries_setter(self, *args, **kwargs): - arg_vals = inspect.getcallargs(orig_func, self, *args, **kwargs) - if arg_vals['num_retries'] is None: + if kwargs.get('num_retries') is None: kwargs['num_retries'] = self.num_retries return orig_func(self, *args, **kwargs) return num_retries_setter