X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/443f3228eb4c56849f77ae9c421dd1cc6fdbc5f1..44c95f99098fa6c6acbfa82d4b6cbc6015eb6e39:/services/nodemanager/arvnodeman/computenode/__init__.py?ds=sidebyside diff --git a/services/nodemanager/arvnodeman/computenode/__init__.py b/services/nodemanager/arvnodeman/computenode/__init__.py index 54d6a82bce..8a4e5f312b 100644 --- a/services/nodemanager/arvnodeman/computenode/__init__.py +++ b/services/nodemanager/arvnodeman/computenode/__init__.py @@ -1,4 +1,7 @@ #!/usr/bin/env python +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 from __future__ import absolute_import, print_function @@ -8,6 +11,9 @@ import itertools import re import time +from ..config import CLOUD_ERRORS +from libcloud.common.exceptions import BaseHTTPError + ARVADOS_TIMEFMT = '%Y-%m-%dT%H:%M:%SZ' ARVADOS_TIMESUBSEC_RE = re.compile(r'(\.\d+)Z$') @@ -70,40 +76,66 @@ class RetryMixin(object): @functools.wraps(orig_func) def retry_wrapper(self, *args, **kwargs): while True: + should_retry = False try: ret = orig_func(self, *args, **kwargs) + except BaseHTTPError as error: + if error.headers and error.headers.get("retry-after"): + try: + self.retry_wait = int(error.headers["retry-after"]) + if self.retry_wait < 0 or self.retry_wait > self.max_retry_wait: + self.retry_wait = self.max_retry_wait + should_retry = True + except ValueError: + pass + if error.code == 429 or error.code >= 500: + should_retry = True + except CLOUD_ERRORS as error: + should_retry = True + except errors as error: + should_retry = True except Exception as error: - if not (isinstance(error, errors) or - self._cloud.is_cloud_exception(error)): - self.retry_wait = self.min_retry_wait - self._logger.warning( - "Re-raising unknown error (no retry): %s", - error, exc_info=error) - raise - - self._logger.warning( - "Client error: %s - waiting %s seconds", - error, self.retry_wait, exc_info=error) - - if self._timer: - start_time = time.time() - # reschedule to be called again - self._timer.schedule(start_time + self.retry_wait, - getattr(self._later, - orig_func.__name__), - *args, **kwargs) - else: - # sleep on it. - time.sleep(self.retry_wait) - - self.retry_wait = min(self.retry_wait * 2, - self.max_retry_wait) - if self._timer: - # expect to be called again by timer so don't loop - return + # As a libcloud workaround for drivers that don't use + # typed exceptions, consider bare Exception() objects + # retryable. + should_retry = type(error) is Exception else: + # No exception, self.retry_wait = self.min_retry_wait return ret + + # Only got here if an exception was caught. Now determine what to do about it. + if not should_retry: + self.retry_wait = self.min_retry_wait + self._logger.warning( + "Re-raising error (no retry): %s", + error, exc_info=error) + raise + + self._logger.warning( + "Client error: %s - %s %s seconds", + error, + "scheduling retry in" if self._timer else "sleeping", + self.retry_wait, + exc_info=error) + + if self._timer: + start_time = time.time() + # reschedule to be called again + self._timer.schedule(start_time + self.retry_wait, + getattr(self._later, + orig_func.__name__), + *args, **kwargs) + else: + # sleep on it. + time.sleep(self.retry_wait) + + self.retry_wait = min(self.retry_wait * 2, + self.max_retry_wait) + if self._timer: + # expect to be called again by timer so don't loop + return + return retry_wrapper return decorator