8931: Use RetryLoop around websocket reconnect. Create a new _EventClient
authorPeter Amstutz <peter.amstutz@curoverse.com>
Tue, 26 Apr 2016 13:51:53 +0000 (09:51 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Tue, 26 Apr 2016 13:51:53 +0000 (09:51 -0400)
object on each loop iteration.  Handle unexpected exceptions in PollClient
retry loop.

sdk/python/arvados/events.py
sdk/python/arvados/retry.py

index 54f3019f36b7b3e5544e3780afb730d61c44d8fd..4985aaf1b7512596125f06c39ca5958f9a9bf6ad 100644 (file)
@@ -116,14 +116,18 @@ class EventClient(object):
     def on_closed(self):
         if self.is_closed == False:
             _logger.warn("Unexpected close. Reconnecting.")
-            self.ec = _EventClient(self.url, self.filters, self.on_event, self.last_log_id, self.on_closed)
-            while True:
+          for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=15):
               try:
+                  self.ec = _EventClient(self.url, self.filters, self.on_event, self.last_log_id, self.on_closed)
                   self.ec.connect()
                   break
               except Exception as e:
-                  _logger.warn("Error '%s' during websocket reconnect. Will retry after 5s.", e, exc_info=e)
-                  time.sleep(5)
+                  _logger.warn("Error '%s' during websocket reconnect.", e)
+           if tries_left == 0:
+                _logger.exception("EventClient thread could not contact websocket server.")
+                self.is_closed = True
+                thread.interrupt_main()
+                return
 
 
 class PollClient(threading.Thread):
@@ -178,6 +182,9 @@ class PollClient(threading.Thread):
                         break
                     except errors.ApiError as error:
                         pass
+                    else:
+                        tries_left = 0
+                        break
                 if tries_left == 0:
                     _logger.exception("PollClient thread could not contact API server.")
                     with self._closing_lock:
index dccd9c875a69ee161923e6fa6818e2d751f92352..5ba4f4ea41016a6225ebb3fca194265e56b56a0b 100644 (file)
@@ -51,7 +51,7 @@ class RetryLoop(object):
         * save_results: Specify a number to save the last N results
           that the loop recorded.  These records are available through
           the results attribute, oldest first.  Default 1.
-        * max_wait: Maximum time to wait between retries.
+        * max_wait: Maximum number of seconds to wait between retries.
         """
         self.tries_left = num_retries + 1
         self.check_result = success_check