X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/4186924dbc103d080c47ae85bcfcd38d87314767..b5dc4ae38eb6da852e167952675aff59987a995c:/sdk/python/arvados/events.py diff --git a/sdk/python/arvados/events.py b/sdk/python/arvados/events.py index 94b8a9d06c..698601cb5d 100644 --- a/sdk/python/arvados/events.py +++ b/sdk/python/arvados/events.py @@ -14,8 +14,8 @@ from ws4py.client.threadedclient import WebSocketClient _logger = logging.getLogger('arvados.events') -class EventClient(WebSocketClient): - def __init__(self, url, filters, on_event, last_log_id): +class _EventClient(WebSocketClient): + def __init__(self, url, filters, on_event, last_log_id, on_closed): ssl_options = {'ca_certs': arvados.util.ca_certs_path()} if config.flag_is_true('ARVADOS_API_HOST_INSECURE'): ssl_options['cert_reqs'] = ssl.CERT_NONE @@ -26,19 +26,23 @@ class EventClient(WebSocketClient): # IPv4 addresses (common with "localhost"), only one of them # will be attempted -- and it might not be the right one. See # ws4py's WebSocketBaseClient.__init__. - super(EventClient, self).__init__(url, ssl_options=ssl_options) + super(_EventClient, self).__init__(url, ssl_options=ssl_options) + self.filters = filters self.on_event = on_event self.last_log_id = last_log_id self._closing_lock = threading.RLock() self._closing = False self._closed = threading.Event() + self.on_closed = on_closed def opened(self): - self.subscribe(self.filters, self.last_log_id) + for f in self.filters: + self.subscribe(f, self.last_log_id) def closed(self, code, reason=None): self._closed.set() + self.on_closed() def received_message(self, m): with self._closing_lock: @@ -51,21 +55,72 @@ class EventClient(WebSocketClient): :timeout: is the number of seconds to wait for ws4py to indicate that the connection has closed. """ - super(EventClient, self).close(code, reason) + super(_EventClient, self).close(code, reason) with self._closing_lock: # make sure we don't process any more messages. self._closing = True # wait for ws4py to tell us the connection is closed. self._closed.wait(timeout=timeout) - def subscribe(self, filters, last_log_id=None): - m = {"method": "subscribe", "filters": filters} + def subscribe(self, f, last_log_id=None): + m = {"method": "subscribe", "filters": f} if last_log_id is not None: m["last_log_id"] = last_log_id self.send(json.dumps(m)) - def unsubscribe(self, filters): - self.send(json.dumps({"method": "unsubscribe", "filters": filters})) + def unsubscribe(self, f): + self.send(json.dumps({"method": "unsubscribe", "filters": f})) + + +class EventClient(object): + def __init__(self, url, filters, on_event_cb, last_log_id): + self.url = url + if filters: + self.filters = [filters] + else: + self.filters = [[]] + self.on_event_cb = on_event_cb + self.last_log_id = last_log_id + self.is_closed = False + self.ec = _EventClient(url, self.filters, self.on_event, last_log_id, self.on_closed) + + def connect(self): + self.ec.connect() + + def close_connection(self): + self.ec.close_connection() + + def subscribe(self, f, last_log_id=None): + self.filters.append(f) + self.ec.subscribe(f, last_log_id) + + def unsubscribe(self, f): + del self.filters[self.filters.index(f)] + self.ec.unsubscribe(f) + + def close(self, code=1000, reason='', timeout=0): + self.is_closed = True + self.ec.close(code, reason, timeout) + + def on_event(self, m): + if m.get('id') != None: + self.last_log_id = m.get('id') + try: + self.on_event_cb(m) + except Exception as e: + _logger.warn("Unexpected exception from event callback.", exc_info=e) + + def on_closed(self): + if self.is_closed == False: + _logger.warn("Unexpected close. Reconnecting.") + self.ec = _EventClient(self.url, self.filters, self.on_event, self.last_log_id, self.on_closed) + while True: + try: + self.ec.connect() + break + except Exception as e: + _logger.warn("Error '%s' during websocket reconnect. Will retry after 5s.", e, exc_info=e) + time.sleep(5) class PollClient(threading.Thread): @@ -89,7 +144,15 @@ class PollClient(threading.Thread): self.id = self.last_log_id else: for f in self.filters: - items = self.api.logs().list(limit=1, order="id desc", filters=f).execute()['items'] + try: + items = self.api.logs().list(limit=1, order="id desc", filters=f).execute(num_retries=1000000)['items'] + except Exception as e: + # Some apparently non-retryable error happened, so log the + # error and shut down gracefully. + _logger.error("Got exception from log query: %s", e) + with self._closing_lock: + self._closing.set() + return if items: if items[0]['id'] > self.id: self.id = items[0]['id'] @@ -100,14 +163,29 @@ class PollClient(threading.Thread): max_id = self.id moreitems = False for f in self.filters: - items = self.api.logs().list(order="id asc", filters=f+[["id", ">", str(self.id)]]).execute() + try: + # If we get a transient error, we really really need to + # just keep trying over and over with the same query or + # we'll potentially drop events which would break the event + # stream contract. + items = self.api.logs().list(order="id asc", filters=f+[["id", ">", str(self.id)]]).execute(num_retries=1000000) + except Exception as e: + # Some apparently non-retryable error happened, so log the + # error and shut down gracefully. + _logger.error("Got exception from log query: %s", e) + with self._closing_lock: + self._closing.set() + return for i in items["items"]: if i['id'] > max_id: max_id = i['id'] with self._closing_lock: if self._closing.is_set(): return - self.on_event(i) + try: + self.on_event(i) + except Exception as e: + _logger.warn("Unexpected exception from event callback.", exc_info=e) if items["items_available"] > len(items["items"]): moreitems = True self.id = max_id @@ -143,12 +221,12 @@ class PollClient(threading.Thread): # to do so raises the same exception." pass - def subscribe(self, filters): + def subscribe(self, f): self.on_event({'status': 200}) - self.filters.append(filters) + self.filters.append(f) - def unsubscribe(self, filters): - del self.filters[self.filters.index(filters)] + def unsubscribe(self, f): + del self.filters[self.filters.index(f)] def _subscribe_websocket(api, filters, on_event, last_log_id=None): @@ -190,7 +268,10 @@ def subscribe(api, filters, on_event, poll_fallback=15, last_log_id=None): return _subscribe_websocket(api, filters, on_event, last_log_id) try: - return _subscribe_websocket(api, filters, on_event, last_log_id) + if not config.flag_is_true('ARVADOS_DISABLE_WEBSOCKETS'): + return _subscribe_websocket(api, filters, on_event, last_log_id) + else: + _logger.info("Using polling because ARVADOS_DISABLE_WEBSOCKETS is true") except Exception as e: _logger.warn("Falling back to polling after websocket error: %s" % e) p = PollClient(api, filters, on_event, poll_fallback, last_log_id)