X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/0eb72b526bf8bbb011551ecf019f604e17a534f1..c3b26754a231ec909506f2ff28af1af9f2e27f2b:/services/nodemanager/arvnodeman/computenode/__init__.py diff --git a/services/nodemanager/arvnodeman/computenode/__init__.py b/services/nodemanager/arvnodeman/computenode/__init__.py index 8a4e5f312b..b124c66540 100644 --- a/services/nodemanager/arvnodeman/computenode/__init__.py +++ b/services/nodemanager/arvnodeman/computenode/__init__.py @@ -12,7 +12,8 @@ import re import time from ..config import CLOUD_ERRORS -from libcloud.common.exceptions import BaseHTTPError +from ..status import tracker +from libcloud.common.exceptions import BaseHTTPError, RateLimitReachedError ARVADOS_TIMEFMT = '%Y-%m-%dT%H:%M:%SZ' ARVADOS_TIMESUBSEC_RE = re.compile(r'(\.\d+)Z$') @@ -32,7 +33,7 @@ def arvados_timestamp(timestr): subsecs = float(subsec_match.group(1)) timestr = timestr[:subsec_match.start()] + 'Z' return calendar.timegm(time.strptime(timestr + 'UTC', - ARVADOS_TIMEFMT + '%Z')) + ARVADOS_TIMEFMT + '%Z')) + subsecs def timestamp_fresh(timestamp, fresh_time): return (time.time() - timestamp) < fresh_time @@ -61,10 +62,9 @@ class RetryMixin(object): is a timer actor.) """ - def __init__(self, retry_wait, max_retry_wait, - logger, cloud, timer=None): - self.min_retry_wait = retry_wait - self.max_retry_wait = max_retry_wait + def __init__(self, retry_wait, max_retry_wait, logger, cloud, timer=None): + self.min_retry_wait = max(1, retry_wait) + self.max_retry_wait = max(self.min_retry_wait, max_retry_wait) self.retry_wait = retry_wait self._logger = logger self._cloud = cloud @@ -79,18 +79,30 @@ class RetryMixin(object): should_retry = False try: ret = orig_func(self, *args, **kwargs) + except RateLimitReachedError as error: + # If retry-after is zero, continue with exponential + # backoff. + if error.retry_after != 0: + self.retry_wait = error.retry_after + should_retry = True except BaseHTTPError as error: if error.headers and error.headers.get("retry-after"): try: - self.retry_wait = int(error.headers["retry-after"]) - if self.retry_wait < 0 or self.retry_wait > self.max_retry_wait: - self.retry_wait = self.max_retry_wait + retry_after = int(error.headers["retry-after"]) + # If retry-after is zero, continue with + # exponential backoff. + if retry_after != 0: + self.retry_wait = retry_after should_retry = True except ValueError: - pass + self._logger.warning( + "Unrecognizable Retry-After header: %r", + error.headers["retry-after"], + exc_info=error) if error.code == 429 or error.code >= 500: should_retry = True except CLOUD_ERRORS as error: + tracker.counter_add('cloud_errors') should_retry = True except errors as error: should_retry = True @@ -98,9 +110,11 @@ class RetryMixin(object): # As a libcloud workaround for drivers that don't use # typed exceptions, consider bare Exception() objects # retryable. - should_retry = type(error) is Exception + if type(error) is Exception: + tracker.counter_add('cloud_errors') + should_retry = True else: - # No exception, + # No exception self.retry_wait = self.min_retry_wait return ret @@ -112,6 +126,12 @@ class RetryMixin(object): error, exc_info=error) raise + # Retry wait out of bounds? + if self.retry_wait < self.min_retry_wait: + self.retry_wait = self.min_retry_wait + elif self.retry_wait > self.max_retry_wait: + self.retry_wait = self.max_retry_wait + self._logger.warning( "Client error: %s - %s %s seconds", error,