Merge branch 'master' into 11060-all-visible-repos
[arvados.git] / sdk / python / arvados / events.py
1 from __future__ import absolute_import
2 from future import standard_library
3 standard_library.install_aliases()
4 from builtins import str
5 from builtins import object
6 import arvados
7 from . import config
8 from . import errors
9 from .retry import RetryLoop
10
11 import logging
12 import json
13 import _thread
14 import threading
15 import time
16 import os
17 import re
18 import ssl
19 from ws4py.client.threadedclient import WebSocketClient
20
21 _logger = logging.getLogger('arvados.events')
22
23
24 class _EventClient(WebSocketClient):
25     def __init__(self, url, filters, on_event, last_log_id, on_closed):
26         ssl_options = {'ca_certs': arvados.util.ca_certs_path()}
27         if config.flag_is_true('ARVADOS_API_HOST_INSECURE'):
28             ssl_options['cert_reqs'] = ssl.CERT_NONE
29         else:
30             ssl_options['cert_reqs'] = ssl.CERT_REQUIRED
31
32         # Warning: If the host part of url resolves to both IPv6 and
33         # IPv4 addresses (common with "localhost"), only one of them
34         # will be attempted -- and it might not be the right one. See
35         # ws4py's WebSocketBaseClient.__init__.
36         super(_EventClient, self).__init__(url, ssl_options=ssl_options)
37
38         self.filters = filters
39         self.on_event = on_event
40         self.last_log_id = last_log_id
41         self._closing_lock = threading.RLock()
42         self._closing = False
43         self._closed = threading.Event()
44         self.on_closed = on_closed
45
46     def opened(self):
47         for f in self.filters:
48             self.subscribe(f, self.last_log_id)
49
50     def closed(self, code, reason=None):
51         self._closed.set()
52         self.on_closed()
53
54     def received_message(self, m):
55         with self._closing_lock:
56             if not self._closing:
57                 self.on_event(json.loads(str(m)))
58
59     def close(self, code=1000, reason='', timeout=0):
60         """Close event client and optionally wait for it to finish.
61
62         :timeout: is the number of seconds to wait for ws4py to
63         indicate that the connection has closed.
64         """
65         super(_EventClient, self).close(code, reason)
66         with self._closing_lock:
67             # make sure we don't process any more messages.
68             self._closing = True
69         # wait for ws4py to tell us the connection is closed.
70         self._closed.wait(timeout=timeout)
71
72     def subscribe(self, f, last_log_id=None):
73         m = {"method": "subscribe", "filters": f}
74         if last_log_id is not None:
75             m["last_log_id"] = last_log_id
76         self.send(json.dumps(m))
77
78     def unsubscribe(self, f):
79         self.send(json.dumps({"method": "unsubscribe", "filters": f}))
80
81
82 class EventClient(object):
83     def __init__(self, url, filters, on_event_cb, last_log_id):
84         self.url = url
85         if filters:
86             self.filters = [filters]
87         else:
88             self.filters = [[]]
89         self.on_event_cb = on_event_cb
90         self.last_log_id = last_log_id
91         self.is_closed = threading.Event()
92         self._setup_event_client()
93
94     def _setup_event_client(self):
95         self.ec = _EventClient(self.url, self.filters, self.on_event,
96                                self.last_log_id, self.on_closed)
97         self.ec.daemon = True
98         try:
99             self.ec.connect()
100         except Exception:
101             self.ec.close_connection()
102             raise
103
104     def subscribe(self, f, last_log_id=None):
105         self.filters.append(f)
106         self.ec.subscribe(f, last_log_id)
107
108     def unsubscribe(self, f):
109         del self.filters[self.filters.index(f)]
110         self.ec.unsubscribe(f)
111
112     def close(self, code=1000, reason='', timeout=0):
113         self.is_closed.set()
114         self.ec.close(code, reason, timeout)
115
116     def on_event(self, m):
117         if m.get('id') != None:
118             self.last_log_id = m.get('id')
119         try:
120             self.on_event_cb(m)
121         except Exception as e:
122             _logger.exception("Unexpected exception from event callback.")
123             _thread.interrupt_main()
124
125     def on_closed(self):
126         if not self.is_closed.is_set():
127             _logger.warning("Unexpected close. Reconnecting.")
128             for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=15):
129                 try:
130                     self._setup_event_client()
131                     _logger.warning("Reconnect successful.")
132                     break
133                 except Exception as e:
134                     _logger.warning("Error '%s' during websocket reconnect.", e)
135             if tries_left == 0:
136                 _logger.exception("EventClient thread could not contact websocket server.")
137                 self.is_closed.set()
138                 _thread.interrupt_main()
139                 return
140
141     def run_forever(self):
142         # Have to poll here to let KeyboardInterrupt get raised.
143         while not self.is_closed.wait(1):
144             pass
145
146
147 class PollClient(threading.Thread):
148     def __init__(self, api, filters, on_event, poll_time, last_log_id):
149         super(PollClient, self).__init__()
150         self.api = api
151         if filters:
152             self.filters = [filters]
153         else:
154             self.filters = [[]]
155         self.on_event = on_event
156         self.poll_time = poll_time
157         self.daemon = True
158         self.last_log_id = last_log_id
159         self._closing = threading.Event()
160         self._closing_lock = threading.RLock()
161
162     def run(self):
163         if self.last_log_id != None:
164             # Caller supplied the last-seen event ID from a previous
165             # connection
166             skip_old_events = [["id", ">", str(self.last_log_id)]]
167         else:
168             # We need to do a reverse-order query to find the most
169             # recent event ID (see "if not skip_old_events" below).
170             skip_old_events = False
171
172         self.on_event({'status': 200})
173
174         while not self._closing.is_set():
175             moreitems = False
176             for f in self.filters:
177                 for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=self.poll_time):
178                     try:
179                         if not skip_old_events:
180                             # If the caller didn't provide a known
181                             # recent ID, our first request will ask
182                             # for the single most recent event from
183                             # the last 2 hours (the time restriction
184                             # avoids doing an expensive database
185                             # query, and leaves a big enough margin to
186                             # account for clock skew). If we do find a
187                             # recent event, we remember its ID but
188                             # then discard it (we are supposed to be
189                             # returning new/current events, not old
190                             # ones).
191                             #
192                             # Subsequent requests will get multiple
193                             # events in chronological order, and
194                             # filter on that same cutoff time, or
195                             # (once we see our first matching event)
196                             # the ID of the last-seen event.
197                             skip_old_events = [[
198                                 "created_at", ">=",
199                                 time.strftime(
200                                     "%Y-%m-%dT%H:%M:%SZ",
201                                     time.gmtime(time.time()-7200))]]
202                             items = self.api.logs().list(
203                                 order="id desc",
204                                 limit=1,
205                                 filters=f+skip_old_events).execute()
206                             if items["items"]:
207                                 skip_old_events = [
208                                     ["id", ">", str(items["items"][0]["id"])]]
209                                 items = {
210                                     "items": [],
211                                     "items_available": 0,
212                                 }
213                         else:
214                             # In this case, either we know the most
215                             # recent matching ID, or we know there
216                             # were no matching events in the 2-hour
217                             # window before subscribing. Either way we
218                             # can safely ask for events in ascending
219                             # order.
220                             items = self.api.logs().list(
221                                 order="id asc",
222                                 filters=f+skip_old_events).execute()
223                         break
224                     except errors.ApiError as error:
225                         pass
226                     else:
227                         tries_left = 0
228                         break
229                 if tries_left == 0:
230                     _logger.exception("PollClient thread could not contact API server.")
231                     with self._closing_lock:
232                         self._closing.set()
233                     _thread.interrupt_main()
234                     return
235                 for i in items["items"]:
236                     skip_old_events = [["id", ">", str(i["id"])]]
237                     with self._closing_lock:
238                         if self._closing.is_set():
239                             return
240                         try:
241                             self.on_event(i)
242                         except Exception as e:
243                             _logger.exception("Unexpected exception from event callback.")
244                             _thread.interrupt_main()
245                 if items["items_available"] > len(items["items"]):
246                     moreitems = True
247             if not moreitems:
248                 self._closing.wait(self.poll_time)
249
250     def run_forever(self):
251         # Have to poll here, otherwise KeyboardInterrupt will never get processed.
252         while not self._closing.is_set():
253             self._closing.wait(1)
254
255     def close(self, code=None, reason=None, timeout=0):
256         """Close poll client and optionally wait for it to finish.
257
258         If an :on_event: handler is running in a different thread,
259         first wait (indefinitely) for it to return.
260
261         After closing, wait up to :timeout: seconds for the thread to
262         finish the poll request in progress (if any).
263
264         :code: and :reason: are ignored. They are present for
265         interface compatibility with EventClient.
266         """
267
268         with self._closing_lock:
269             self._closing.set()
270         try:
271             self.join(timeout=timeout)
272         except RuntimeError:
273             # "join() raises a RuntimeError if an attempt is made to join the
274             # current thread as that would cause a deadlock. It is also an
275             # error to join() a thread before it has been started and attempts
276             # to do so raises the same exception."
277             pass
278
279     def subscribe(self, f):
280         self.on_event({'status': 200})
281         self.filters.append(f)
282
283     def unsubscribe(self, f):
284         del self.filters[self.filters.index(f)]
285
286
287 def _subscribe_websocket(api, filters, on_event, last_log_id=None):
288     endpoint = api._rootDesc.get('websocketUrl', None)
289     if not endpoint:
290         raise errors.FeatureNotEnabledError(
291             "Server does not advertise a websocket endpoint")
292     uri_with_token = "{}?api_token={}".format(endpoint, api.api_token)
293     try:
294         client = EventClient(uri_with_token, filters, on_event, last_log_id)
295     except Exception:
296         _logger.warning("Failed to connect to websockets on %s" % endpoint)
297         raise
298     else:
299         return client
300
301
302 def subscribe(api, filters, on_event, poll_fallback=15, last_log_id=None):
303     """
304     :api:
305       a client object retrieved from arvados.api(). The caller should not use this client object for anything else after calling subscribe().
306     :filters:
307       Initial subscription filters.
308     :on_event:
309       The callback when a message is received.
310     :poll_fallback:
311       If websockets are not available, fall back to polling every N seconds.  If poll_fallback=False, this will return None if websockets are not available.
312     :last_log_id:
313       Log rows that are newer than the log id
314     """
315
316     if not poll_fallback:
317         return _subscribe_websocket(api, filters, on_event, last_log_id)
318
319     try:
320         if not config.flag_is_true('ARVADOS_DISABLE_WEBSOCKETS'):
321             return _subscribe_websocket(api, filters, on_event, last_log_id)
322         else:
323             _logger.info("Using polling because ARVADOS_DISABLE_WEBSOCKETS is true")
324     except Exception as e:
325         _logger.warning("Falling back to polling after websocket error: %s" % e)
326     p = PollClient(api, filters, on_event, poll_fallback, last_log_id)
327     p.start()
328     return p