8784: Fix test for latest firefox.
[arvados.git] / services / nodemanager / arvnodeman / computenode / __init__.py
1 #!/usr/bin/env python
2
3 from __future__ import absolute_import, print_function
4
5 import calendar
6 import functools
7 import itertools
8 import re
9 import time
10
11 from ..config import CLOUD_ERRORS
12 from libcloud.common.exceptions import BaseHTTPError
13
14 ARVADOS_TIMEFMT = '%Y-%m-%dT%H:%M:%SZ'
15 ARVADOS_TIMESUBSEC_RE = re.compile(r'(\.\d+)Z$')
16
17 def arvados_node_fqdn(arvados_node, default_hostname='dynamic.compute'):
18     hostname = arvados_node.get('hostname') or default_hostname
19     return '{}.{}'.format(hostname, arvados_node['domain'])
20
21 def arvados_node_mtime(node):
22     return arvados_timestamp(node['modified_at'])
23
24 def arvados_timestamp(timestr):
25     subsec_match = ARVADOS_TIMESUBSEC_RE.search(timestr)
26     if subsec_match is None:
27         subsecs = .0
28     else:
29         subsecs = float(subsec_match.group(1))
30         timestr = timestr[:subsec_match.start()] + 'Z'
31     return calendar.timegm(time.strptime(timestr + 'UTC',
32                                          ARVADOS_TIMEFMT + '%Z'))
33
34 def timestamp_fresh(timestamp, fresh_time):
35     return (time.time() - timestamp) < fresh_time
36
37 def arvados_node_missing(arvados_node, fresh_time):
38     """Indicate if cloud node corresponding to the arvados
39     node is "missing".
40
41     If True, this means the node has not pinged the API server within the timeout
42     period.  If False, the ping is up to date.  If the node has never pinged,
43     returns None.
44     """
45     if arvados_node["last_ping_at"] is None:
46         return None
47     else:
48         return not timestamp_fresh(arvados_timestamp(arvados_node["last_ping_at"]), fresh_time)
49
50 class RetryMixin(object):
51     """Retry decorator for an method that makes remote requests.
52
53     Use this function to decorate method, and pass in a tuple of exceptions to
54     catch.  If the original method raises a known cloud driver error, or any of
55     the given exception types, this decorator will either go into a
56     sleep-and-retry loop with exponential backoff either by sleeping (if
57     self._timer is None) or by scheduling retries of the method (if self._timer
58     is a timer actor.)
59
60     """
61     def __init__(self, retry_wait, max_retry_wait,
62                  logger, cloud, timer=None):
63         self.min_retry_wait = retry_wait
64         self.max_retry_wait = max_retry_wait
65         self.retry_wait = retry_wait
66         self._logger = logger
67         self._cloud = cloud
68         self._timer = timer
69
70     @staticmethod
71     def _retry(errors=()):
72         def decorator(orig_func):
73             @functools.wraps(orig_func)
74             def retry_wrapper(self, *args, **kwargs):
75                 while True:
76                     should_retry = False
77                     try:
78                         ret = orig_func(self, *args, **kwargs)
79                     except BaseHTTPError as error:
80                         if error.headers and error.headers.get("retry-after"):
81                             try:
82                                 self.retry_wait = int(error.headers["retry-after"])
83                                 if self.retry_wait < 0 or self.retry_wait > self.max_retry_wait:
84                                     self.retry_wait = self.max_retry_wait
85                                 should_retry = True
86                             except ValueError:
87                                 pass
88                         if error.code == 429 or error.code >= 500:
89                             should_retry = True
90                     except CLOUD_ERRORS as error:
91                         should_retry = True
92                     except errors as error:
93                         should_retry = True
94                     except Exception as error:
95                         # As a libcloud workaround for drivers that don't use
96                         # typed exceptions, consider bare Exception() objects
97                         # retryable.
98                         should_retry = type(error) is Exception
99                     else:
100                         # No exception,
101                         self.retry_wait = self.min_retry_wait
102                         return ret
103
104                     # Only got here if an exception was caught.  Now determine what to do about it.
105                     if not should_retry:
106                         self.retry_wait = self.min_retry_wait
107                         self._logger.warning(
108                             "Re-raising error (no retry): %s",
109                             error, exc_info=error)
110                         raise
111
112                     self._logger.warning(
113                         "Client error: %s - %s %s seconds",
114                         error,
115                         "scheduling retry in" if self._timer else "sleeping",
116                         self.retry_wait,
117                         exc_info=error)
118
119                     if self._timer:
120                         start_time = time.time()
121                         # reschedule to be called again
122                         self._timer.schedule(start_time + self.retry_wait,
123                                              getattr(self._later,
124                                                      orig_func.__name__),
125                                              *args, **kwargs)
126                     else:
127                         # sleep on it.
128                         time.sleep(self.retry_wait)
129
130                     self.retry_wait = min(self.retry_wait * 2,
131                                           self.max_retry_wait)
132                     if self._timer:
133                         # expect to be called again by timer so don't loop
134                         return
135
136             return retry_wrapper
137         return decorator
138
139 class ShutdownTimer(object):
140     """Keep track of a cloud node's shutdown windows.
141
142     Instantiate this class with a timestamp of when a cloud node started,
143     and a list of durations (in minutes) of when the node must not and may
144     be shut down, alternating.  The class will tell you when a shutdown
145     window is open, and when the next open window will start.
146     """
147     def __init__(self, start_time, shutdown_windows):
148         # The implementation is easiest if we have an even number of windows,
149         # because then windows always alternate between open and closed.
150         # Rig that up: calculate the first shutdown window based on what's
151         # passed in.  Then, if we were given an odd number of windows, merge
152         # that first window into the last one, since they both# represent
153         # closed state.
154         first_window = shutdown_windows[0]
155         shutdown_windows = list(shutdown_windows[1:])
156         self._next_opening = start_time + (60 * first_window)
157         if len(shutdown_windows) % 2:
158             shutdown_windows.append(first_window)
159         else:
160             shutdown_windows[-1] += first_window
161         self.shutdown_windows = itertools.cycle([60 * n
162                                                  for n in shutdown_windows])
163         self._open_start = self._next_opening
164         self._open_for = next(self.shutdown_windows)
165
166     def _advance_opening(self):
167         while self._next_opening < time.time():
168             self._open_start = self._next_opening
169             self._next_opening += self._open_for + next(self.shutdown_windows)
170             self._open_for = next(self.shutdown_windows)
171
172     def next_opening(self):
173         self._advance_opening()
174         return self._next_opening
175
176     def window_open(self):
177         self._advance_opening()
178         return 0 < (time.time() - self._open_start) < self._open_for