1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 from __future__ import absolute_import
6 from future import standard_library
7 standard_library.install_aliases()
8 from builtins import str
9 from builtins import object
13 from .retry import RetryLoop
23 from ws4py.client.threadedclient import WebSocketClient
25 _logger = logging.getLogger('arvados.events')
28 class _EventClient(WebSocketClient):
29 def __init__(self, url, filters, on_event, last_log_id, on_closed):
30 ssl_options = {'ca_certs': arvados.util.ca_certs_path()}
31 if config.flag_is_true('ARVADOS_API_HOST_INSECURE'):
32 ssl_options['cert_reqs'] = ssl.CERT_NONE
34 ssl_options['cert_reqs'] = ssl.CERT_REQUIRED
36 # Warning: If the host part of url resolves to both IPv6 and
37 # IPv4 addresses (common with "localhost"), only one of them
38 # will be attempted -- and it might not be the right one. See
39 # ws4py's WebSocketBaseClient.__init__.
40 super(_EventClient, self).__init__(url, ssl_options=ssl_options)
42 self.filters = filters
43 self.on_event = on_event
44 self.last_log_id = last_log_id
45 self._closing_lock = threading.RLock()
47 self._closed = threading.Event()
48 self.on_closed = on_closed
51 for f in self.filters:
52 self.subscribe(f, self.last_log_id)
54 def closed(self, code, reason=None):
58 def received_message(self, m):
59 with self._closing_lock:
61 self.on_event(json.loads(str(m)))
63 def close(self, code=1000, reason='', timeout=0):
64 """Close event client and optionally wait for it to finish.
66 :timeout: is the number of seconds to wait for ws4py to
67 indicate that the connection has closed.
69 super(_EventClient, self).close(code, reason)
70 with self._closing_lock:
71 # make sure we don't process any more messages.
73 # wait for ws4py to tell us the connection is closed.
74 self._closed.wait(timeout=timeout)
76 def subscribe(self, f, last_log_id=None):
77 m = {"method": "subscribe", "filters": f}
78 if last_log_id is not None:
79 m["last_log_id"] = last_log_id
80 self.send(json.dumps(m))
82 def unsubscribe(self, f):
83 self.send(json.dumps({"method": "unsubscribe", "filters": f}))
86 class EventClient(object):
87 def __init__(self, url, filters, on_event_cb, last_log_id):
90 self.filters = [filters]
93 self.on_event_cb = on_event_cb
94 self.last_log_id = last_log_id
95 self.is_closed = threading.Event()
96 self._setup_event_client()
98 def _setup_event_client(self):
99 self.ec = _EventClient(self.url, self.filters, self.on_event,
100 self.last_log_id, self.on_closed)
101 self.ec.daemon = True
105 self.ec.close_connection()
108 def subscribe(self, f, last_log_id=None):
109 self.filters.append(f)
110 self.ec.subscribe(f, last_log_id)
112 def unsubscribe(self, f):
113 del self.filters[self.filters.index(f)]
114 self.ec.unsubscribe(f)
116 def close(self, code=1000, reason='', timeout=0):
118 self.ec.close(code, reason, timeout)
120 def on_event(self, m):
121 if m.get('id') != None:
122 self.last_log_id = m.get('id')
125 except Exception as e:
126 _logger.exception("Unexpected exception from event callback.")
127 _thread.interrupt_main()
130 if not self.is_closed.is_set():
131 _logger.warning("Unexpected close. Reconnecting.")
132 for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=15):
134 self._setup_event_client()
135 _logger.warning("Reconnect successful.")
137 except Exception as e:
138 _logger.warning("Error '%s' during websocket reconnect.", e)
140 _logger.exception("EventClient thread could not contact websocket server.")
142 _thread.interrupt_main()
145 def run_forever(self):
146 # Have to poll here to let KeyboardInterrupt get raised.
147 while not self.is_closed.wait(1):
151 class PollClient(threading.Thread):
152 def __init__(self, api, filters, on_event, poll_time, last_log_id):
153 super(PollClient, self).__init__()
156 self.filters = [filters]
159 self.on_event = on_event
160 self.poll_time = poll_time
162 self.last_log_id = last_log_id
163 self._closing = threading.Event()
164 self._closing_lock = threading.RLock()
166 if self.last_log_id != None:
167 # Caller supplied the last-seen event ID from a previous
169 self._skip_old_events = [["id", ">", str(self.last_log_id)]]
171 # We need to do a reverse-order query to find the most
172 # recent event ID (see "if not self._skip_old_events"
174 self._skip_old_events = False
177 self.on_event({'status': 200})
179 while not self._closing.is_set():
181 for f in self.filters:
182 for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=self.poll_time):
184 if not self._skip_old_events:
185 # If the caller didn't provide a known
186 # recent ID, our first request will ask
187 # for the single most recent event from
188 # the last 2 hours (the time restriction
189 # avoids doing an expensive database
190 # query, and leaves a big enough margin to
191 # account for clock skew). If we do find a
192 # recent event, we remember its ID but
193 # then discard it (we are supposed to be
194 # returning new/current events, not old
197 # Subsequent requests will get multiple
198 # events in chronological order, and
199 # filter on that same cutoff time, or
200 # (once we see our first matching event)
201 # the ID of the last-seen event.
203 # Note: self._skip_old_events must not be
204 # set until the threshold is decided.
205 # Otherwise, tests will be unreliable.
209 "%Y-%m-%dT%H:%M:%SZ",
210 time.gmtime(time.time()-7200))]]
211 items = self.api.logs().list(
214 filters=f+filter_by_time).execute()
216 self._skip_old_events = [
217 ["id", ">", str(items["items"][0]["id"])]]
220 "items_available": 0,
223 # No recent events. We can keep using
224 # the same timestamp threshold until
225 # we receive our first new event.
226 self._skip_old_events = filter_by_time
228 # In this case, either we know the most
229 # recent matching ID, or we know there
230 # were no matching events in the 2-hour
231 # window before subscribing. Either way we
232 # can safely ask for events in ascending
234 items = self.api.logs().list(
236 filters=f+self._skip_old_events).execute()
238 except errors.ApiError as error:
244 _logger.exception("PollClient thread could not contact API server.")
245 with self._closing_lock:
247 _thread.interrupt_main()
249 for i in items["items"]:
250 self._skip_old_events = [["id", ">", str(i["id"])]]
251 with self._closing_lock:
252 if self._closing.is_set():
256 except Exception as e:
257 _logger.exception("Unexpected exception from event callback.")
258 _thread.interrupt_main()
259 if items["items_available"] > len(items["items"]):
262 self._closing.wait(self.poll_time)
264 def run_forever(self):
265 # Have to poll here, otherwise KeyboardInterrupt will never get processed.
266 while not self._closing.is_set():
267 self._closing.wait(1)
269 def close(self, code=None, reason=None, timeout=0):
270 """Close poll client and optionally wait for it to finish.
272 If an :on_event: handler is running in a different thread,
273 first wait (indefinitely) for it to return.
275 After closing, wait up to :timeout: seconds for the thread to
276 finish the poll request in progress (if any).
278 :code: and :reason: are ignored. They are present for
279 interface compatibility with EventClient.
282 with self._closing_lock:
285 self.join(timeout=timeout)
287 # "join() raises a RuntimeError if an attempt is made to join the
288 # current thread as that would cause a deadlock. It is also an
289 # error to join() a thread before it has been started and attempts
290 # to do so raises the same exception."
293 def subscribe(self, f):
294 self.on_event({'status': 200})
295 self.filters.append(f)
297 def unsubscribe(self, f):
298 del self.filters[self.filters.index(f)]
301 def _subscribe_websocket(api, filters, on_event, last_log_id=None):
302 endpoint = api._rootDesc.get('websocketUrl', None)
304 raise errors.FeatureNotEnabledError(
305 "Server does not advertise a websocket endpoint")
306 uri_with_token = "{}?api_token={}".format(endpoint, api.api_token)
308 client = EventClient(uri_with_token, filters, on_event, last_log_id)
310 _logger.warning("Failed to connect to websockets on %s" % endpoint)
316 def subscribe(api, filters, on_event, poll_fallback=15, last_log_id=None):
319 a client object retrieved from arvados.api(). The caller should not use this client object for anything else after calling subscribe().
321 Initial subscription filters.
323 The callback when a message is received.
325 If websockets are not available, fall back to polling every N seconds. If poll_fallback=False, this will return None if websockets are not available.
327 Log rows that are newer than the log id
330 if not poll_fallback:
331 return _subscribe_websocket(api, filters, on_event, last_log_id)
334 if not config.flag_is_true('ARVADOS_DISABLE_WEBSOCKETS'):
335 return _subscribe_websocket(api, filters, on_event, last_log_id)
337 _logger.info("Using polling because ARVADOS_DISABLE_WEBSOCKETS is true")
338 except Exception as e:
339 _logger.warning("Falling back to polling after websocket error: %s" % e)
340 p = PollClient(api, filters, on_event, poll_fallback, last_log_id)