X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/2d112118532f0e059bc7e72d85ee63083a98156f..9b98829c565a2aa487d21ecd7f9429c23d0cec20:/sdk/python/arvados/events.py diff --git a/sdk/python/arvados/events.py b/sdk/python/arvados/events.py index 79960c43bf..a1b4638439 100644 --- a/sdk/python/arvados/events.py +++ b/sdk/python/arvados/events.py @@ -1,9 +1,11 @@ import arvados import config import errors +from retry import RetryLoop import logging import json +import thread import threading import time import os @@ -81,14 +83,18 @@ class EventClient(object): self.filters = [[]] self.on_event_cb = on_event_cb self.last_log_id = last_log_id - self.is_closed = False - self.ec = _EventClient(url, self.filters, self.on_event, last_log_id, self.on_closed) + self.is_closed = threading.Event() + self._setup_event_client() - def connect(self): - self.ec.connect() - - def close_connection(self): - self.ec.close_connection() + def _setup_event_client(self): + self.ec = _EventClient(self.url, self.filters, self.on_event, + self.last_log_id, self.on_closed) + self.ec.daemon = True + try: + self.ec.connect() + except Exception: + self.ec.close_connection() + raise def subscribe(self, f, last_log_id=None): self.filters.append(f) @@ -99,25 +105,38 @@ class EventClient(object): self.ec.unsubscribe(f) def close(self, code=1000, reason='', timeout=0): - self.is_closed = True + self.is_closed.set() self.ec.close(code, reason, timeout) def on_event(self, m): if m.get('id') != None: self.last_log_id = m.get('id') - self.on_event_cb(m) + try: + self.on_event_cb(m) + except Exception as e: + _logger.exception("Unexpected exception from event callback.") + thread.interrupt_main() def on_closed(self): - if self.is_closed == False: + if not self.is_closed.is_set(): _logger.warn("Unexpected close. Reconnecting.") - self.ec = _EventClient(self.url, self.filters, self.on_event, self.last_log_id, self.on_closed) - while True: - try: - self.ec.connect() - break - except Exception as e: - _logger.warn("Error '%s' during websocket reconnect. Will retry after 5s.", e, exc_info=e) - time.sleep(5) + for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=15): + try: + self._setup_event_client() + _logger.warn("Reconnect successful.") + break + except Exception as e: + _logger.warn("Error '%s' during websocket reconnect.", e) + if tries_left == 0: + _logger.exception("EventClient thread could not contact websocket server.") + self.is_closed.set() + thread.interrupt_main() + return + + def run_forever(self): + # Have to poll here to let KeyboardInterrupt get raised. + while not self.is_closed.wait(1): + pass class PollClient(threading.Thread): @@ -141,7 +160,21 @@ class PollClient(threading.Thread): self.id = self.last_log_id else: for f in self.filters: - items = self.api.logs().list(limit=1, order="id desc", filters=f).execute()['items'] + for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=self.poll_time): + try: + items = self.api.logs().list(limit=1, order="id desc", filters=f).execute()['items'] + break + except errors.ApiError as error: + pass + else: + tries_left = 0 + break + if tries_left == 0: + _logger.exception("PollClient thread could not contact API server.") + with self._closing_lock: + self._closing.set() + thread.interrupt_main() + return if items: if items[0]['id'] > self.id: self.id = items[0]['id'] @@ -152,14 +185,32 @@ class PollClient(threading.Thread): max_id = self.id moreitems = False for f in self.filters: - items = self.api.logs().list(order="id asc", filters=f+[["id", ">", str(self.id)]]).execute() + for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=self.poll_time): + try: + items = self.api.logs().list(order="id asc", filters=f+[["id", ">", str(self.id)]]).execute() + break + except errors.ApiError as error: + pass + else: + tries_left = 0 + break + if tries_left == 0: + _logger.exception("PollClient thread could not contact API server.") + with self._closing_lock: + self._closing.set() + thread.interrupt_main() + return for i in items["items"]: if i['id'] > max_id: max_id = i['id'] with self._closing_lock: if self._closing.is_set(): return - self.on_event(i) + try: + self.on_event(i) + except Exception as e: + _logger.exception("Unexpected exception from event callback.") + thread.interrupt_main() if items["items_available"] > len(items["items"]): moreitems = True self.id = max_id @@ -208,20 +259,14 @@ def _subscribe_websocket(api, filters, on_event, last_log_id=None): if not endpoint: raise errors.FeatureNotEnabledError( "Server does not advertise a websocket endpoint") + uri_with_token = "{}?api_token={}".format(endpoint, api.api_token) try: - uri_with_token = "{}?api_token={}".format(endpoint, api.api_token) client = EventClient(uri_with_token, filters, on_event, last_log_id) - ok = False - try: - client.connect() - ok = True - return client - finally: - if not ok: - client.close_connection() - except: + except Exception: _logger.warn("Failed to connect to websockets on %s" % endpoint) raise + else: + return client def subscribe(api, filters, on_event, poll_fallback=15, last_log_id=None):