18842: Clean up keep cache set logic a little more
[arvados.git] / sdk / python / arvados / events.py
index 81a9b36182a8545adbdcd3fd6afec7f0fba53602..e53e4980a86f01a595649331d020c6b87e823e6a 100644 (file)
@@ -1,11 +1,20 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import absolute_import
+from future import standard_library
+standard_library.install_aliases()
+from builtins import str
+from builtins import object
 import arvados
-import config
-import errors
-from retry import RetryLoop
+from . import config
+from . import errors
+from .retry import RetryLoop
 
 import logging
 import json
-import thread
+import _thread
 import threading
 import time
 import os
@@ -115,21 +124,22 @@ class EventClient(object):
             self.on_event_cb(m)
         except Exception as e:
             _logger.exception("Unexpected exception from event callback.")
-            thread.interrupt_main()
+            _thread.interrupt_main()
 
     def on_closed(self):
         if not self.is_closed.is_set():
-            _logger.warn("Unexpected close. Reconnecting.")
+            _logger.warning("Unexpected close. Reconnecting.")
             for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=15):
                 try:
                     self._setup_event_client()
+                    _logger.warning("Reconnect successful.")
                     break
                 except Exception as e:
-                    _logger.warn("Error '%s' during websocket reconnect.", e)
+                    _logger.warning("Error '%s' during websocket reconnect.", e)
             if tries_left == 0:
                 _logger.exception("EventClient thread could not contact websocket server.")
                 self.is_closed.set()
-                thread.interrupt_main()
+                _thread.interrupt_main()
                 return
 
     def run_forever(self):
@@ -153,40 +163,77 @@ class PollClient(threading.Thread):
         self._closing = threading.Event()
         self._closing_lock = threading.RLock()
 
-    def run(self):
-        self.id = 0
         if self.last_log_id != None:
-            self.id = self.last_log_id
+            # Caller supplied the last-seen event ID from a previous
+            # connection.
+            self._skip_old_events = [["id", ">", str(self.last_log_id)]]
         else:
-            for f in self.filters:
-                for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=self.poll_time):
-                    try:
-                        items = self.api.logs().list(limit=1, order="id desc", filters=f).execute()['items']
-                        break
-                    except errors.ApiError as error:
-                        pass
-                    else:
-                        tries_left = 0
-                        break
-                if tries_left == 0:
-                    _logger.exception("PollClient thread could not contact API server.")
-                    with self._closing_lock:
-                        self._closing.set()
-                    thread.interrupt_main()
-                    return
-                if items:
-                    if items[0]['id'] > self.id:
-                        self.id = items[0]['id']
+            # We need to do a reverse-order query to find the most
+            # recent event ID (see "if not self._skip_old_events"
+            # in run()).
+            self._skip_old_events = False
 
+    def run(self):
         self.on_event({'status': 200})
 
         while not self._closing.is_set():
-            max_id = self.id
             moreitems = False
             for f in self.filters:
                 for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=self.poll_time):
                     try:
-                        items = self.api.logs().list(order="id asc", filters=f+[["id", ">", str(self.id)]]).execute()
+                        if not self._skip_old_events:
+                            # If the caller didn't provide a known
+                            # recent ID, our first request will ask
+                            # for the single most recent event from
+                            # the last 2 hours (the time restriction
+                            # avoids doing an expensive database
+                            # query, and leaves a big enough margin to
+                            # account for clock skew). If we do find a
+                            # recent event, we remember its ID but
+                            # then discard it (we are supposed to be
+                            # returning new/current events, not old
+                            # ones).
+                            #
+                            # Subsequent requests will get multiple
+                            # events in chronological order, and
+                            # filter on that same cutoff time, or
+                            # (once we see our first matching event)
+                            # the ID of the last-seen event.
+                            #
+                            # Note: self._skip_old_events must not be
+                            # set until the threshold is decided.
+                            # Otherwise, tests will be unreliable.
+                            filter_by_time = [[
+                                "created_at", ">=",
+                                time.strftime(
+                                    "%Y-%m-%dT%H:%M:%SZ",
+                                    time.gmtime(time.time()-7200))]]
+                            items = self.api.logs().list(
+                                order="id desc",
+                                limit=1,
+                                filters=f+filter_by_time).execute()
+                            if items["items"]:
+                                self._skip_old_events = [
+                                    ["id", ">", str(items["items"][0]["id"])]]
+                                items = {
+                                    "items": [],
+                                    "items_available": 0,
+                                }
+                            else:
+                                # No recent events. We can keep using
+                                # the same timestamp threshold until
+                                # we receive our first new event.
+                                self._skip_old_events = filter_by_time
+                        else:
+                            # In this case, either we know the most
+                            # recent matching ID, or we know there
+                            # were no matching events in the 2-hour
+                            # window before subscribing. Either way we
+                            # can safely ask for events in ascending
+                            # order.
+                            items = self.api.logs().list(
+                                order="id asc",
+                                filters=f+self._skip_old_events).execute()
                         break
                     except errors.ApiError as error:
                         pass
@@ -197,11 +244,10 @@ class PollClient(threading.Thread):
                     _logger.exception("PollClient thread could not contact API server.")
                     with self._closing_lock:
                         self._closing.set()
-                    thread.interrupt_main()
+                    _thread.interrupt_main()
                     return
                 for i in items["items"]:
-                    if i['id'] > max_id:
-                        max_id = i['id']
+                    self._skip_old_events = [["id", ">", str(i["id"])]]
                     with self._closing_lock:
                         if self._closing.is_set():
                             return
@@ -209,10 +255,9 @@ class PollClient(threading.Thread):
                             self.on_event(i)
                         except Exception as e:
                             _logger.exception("Unexpected exception from event callback.")
-                            thread.interrupt_main()
+                            _thread.interrupt_main()
                 if items["items_available"] > len(items["items"]):
                     moreitems = True
-            self.id = max_id
             if not moreitems:
                 self._closing.wait(self.poll_time)
 
@@ -262,7 +307,7 @@ def _subscribe_websocket(api, filters, on_event, last_log_id=None):
     try:
         client = EventClient(uri_with_token, filters, on_event, last_log_id)
     except Exception:
-        _logger.warn("Failed to connect to websockets on %s" % endpoint)
+        _logger.warning("Failed to connect to websockets on %s" % endpoint)
         raise
     else:
         return client
@@ -291,7 +336,7 @@ def subscribe(api, filters, on_event, poll_fallback=15, last_log_id=None):
         else:
             _logger.info("Using polling because ARVADOS_DISABLE_WEBSOCKETS is true")
     except Exception as e:
-        _logger.warn("Falling back to polling after websocket error: %s" % e)
+        _logger.warning("Falling back to polling after websocket error: %s" % e)
     p = PollClient(api, filters, on_event, poll_fallback, last_log_id)
     p.start()
     return p