require 'faye/websocket'
require 'record_filters'
require 'load_param'
+require 'set'
+require 'thread'
# Patch in user, last_log_id and filters fields into the Faye::Websocket class.
module Faye
attr_accessor :user
attr_accessor :last_log_id
attr_accessor :filters
+ attr_accessor :sent_ids
+ attr_accessor :queue
+ attr_accessor :frame_mtx
+ end
+end
+
+module WebSocket
+ class Driver
+
+ class Server
+ alias_method :_write, :write
+
+ def write(data)
+ # Most of the sending activity will be from the thread set up in
+ # on_connect. However, there is also some automatic activity in the
+ # form of ping/pong messages, so ensure that the write method used to
+ # send one complete message to the underlying socket can only be
+ # called by one thread at a time.
+ self.frame_mtx.synchronize do
+ _write(data)
+ end
+ end
+ end
end
end
@channel = EventMachine::Channel.new
@mtx = Mutex.new
@bgthread = false
+ @connection_count = 0
+ end
+
+ def send_message(ws, obj)
+ ws.send(Oj.dump(obj, mode: :compat))
end
# Push out any pending events to the connection +ws+
#
# It queries the database for log rows that are either
# a) greater than ws.last_log_id, which is the last log id which was a candidate to be sent out
- # b) if ws.last_log_id is nil, then it queries rows starting with notify_id
+ # b) if ws.last_log_id is nil, then it queries the row notify_id
#
- # Regular Arvados permissions are applied using readable_by() and filters using record_filters()
- # To avoid clogging up the database, queries are limited to batches of 100. It will schedule a new
- # push_events call if there are more log rows to send.
+ # Regular Arvados permissions are applied using readable_by() and filters using record_filters().
def push_events ws, notify_id
begin
- if !notify_id.nil? and !ws.last_log_id.nil? and notify_id <= ws.last_log_id
- # This notify is for a row we've handled already.
- return
- end
-
# Must have at least one filter set up to receive events
if ws.filters.length > 0
- # Start with log rows readable by user, sorted in ascending order
- logs = Log.readable_by(ws.user).order("id asc")
+ # Start with log rows readable by user
+ logs = Log.readable_by(ws.user)
cond_id = nil
cond_out = []
param_out = []
- if !ws.last_log_id.nil?
- # Client is only interested in log rows that are newer than the
- # last log row seen by the client.
+ if not ws.last_log_id.nil?
+ # We are catching up from some starting point.
cond_id = "logs.id > ?"
param_out << ws.last_log_id
- elsif !notify_id.nil?
- # No last log id, so look at rows starting with notify id
- cond_id = "logs.id >= ?"
+ elsif not notify_id.nil?
+ # Get next row being notified.
+ cond_id = "logs.id = ?"
param_out << notify_id
else
# No log id to start from, nothing to do, return
logs = logs.where(cond_id, *param_out)
end
- # Execute query and actually send the matching log rows
- count = 0
- limit = 20
-
- logs.limit(limit).each do |l|
- ws.send(l.as_api_response.to_json)
- ws.last_log_id = l.id
- count += 1
- end
-
- if count == limit
- # Number of rows returned was capped by limit(), we need to schedule
- # another query to get more logs (will start from last_log_id
- # reported by current query)
- EventMachine::schedule do
- push_events ws, nil
+ # Execute query and actually send the matching log rows. Load
+ # the full log records only when we're ready to send them,
+ # though: otherwise, (1) postgres has to build the whole
+ # result set and return it to us before we can send the first
+ # event, and (2) we store lots of records in memory while
+ # waiting to spool them out to the client. Both of these are
+ # troublesome when log records are large (e.g., a collection
+ # update contains both old and new manifest_text).
+ #
+ # Note: find_each implies order('id asc'), which is what we
+ # want.
+ logs.select('logs.id').find_each do |l|
+ if not ws.sent_ids.include?(l.id)
+ # only send if not a duplicate
+ send_message(ws, Log.find(l.id).as_api_response)
+ end
+ if not ws.last_log_id.nil?
+ # record ids only when sending "catchup" messages, not notifies
+ ws.sent_ids << l.id
end
- elsif !notify_id.nil? and (ws.last_log_id.nil? or notify_id > ws.last_log_id)
- # Number of rows returned was less than cap, but the notify id is
- # higher than the last id visible to the client, so update last_log_id
- ws.last_log_id = notify_id
end
- elsif !notify_id.nil?
- # No filters set up, so just record the sequence number
- ws.last_log_id = notify_id
+ ws.last_log_id = nil
end
+ rescue ArgumentError => e
+ # There was some kind of user error.
+ Rails.logger.warn "Error publishing event: #{$!}"
+ send_message(ws, {status: 500, message: $!})
+ ws.close
rescue => e
Rails.logger.warn "Error publishing event: #{$!}"
Rails.logger.warn "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- ws.send ({status: 500, message: 'error'}.to_json)
+ send_message(ws, {status: 500, message: $!})
ws.close
# These exceptions typically indicate serious server trouble:
# out of memory issues, database connection problems, etc. Go ahead and
begin
begin
# Parse event data as JSON
- p = (Oj.load event.data).symbolize_keys
+ p = (Oj.strict_load event.data).symbolize_keys
filter = Filter.new(p)
rescue Oj::Error => e
- ws.send ({status: 400, message: "malformed request"}.to_json)
+ send_message(ws, {status: 400, message: "malformed request"})
return
end
# Set or reset the last_log_id. The event bus only reports events
# for rows that come after last_log_id.
ws.last_log_id = p[:last_log_id].to_i
+ # Reset sent_ids for consistency
+ # (always re-deliver all matching messages following last_log_id)
+ ws.sent_ids = Set.new
end
- if ws.filters.length < MAX_FILTERS
+ if ws.filters.length < Rails.configuration.websocket_max_filters
# Add a filter. This gets the :filters field which is the same
# format as used for regular index queries.
ws.filters << filter
- ws.send ({status: 200, message: 'subscribe ok', filter: p}.to_json)
+ send_message(ws, {status: 200, message: 'subscribe ok', filter: p})
# Send any pending events
push_events ws, nil
else
- ws.send ({status: 403, message: "maximum of #{MAX_FILTERS} filters allowed per connection"}.to_json)
+ send_message(ws, {status: 403, message: "maximum of #{Rails.configuration.websocket_max_filters} filters allowed per connection"})
end
elsif p[:method] == 'unsubscribe'
len = ws.filters.length
ws.filters.select! { |f| not ((f.filters == p[:filters]) or (f.filters.empty? and p[:filters].nil?)) }
if ws.filters.length < len
- ws.send ({status: 200, message: 'unsubscribe ok'}.to_json)
+ send_message(ws, {status: 200, message: 'unsubscribe ok'})
else
- ws.send ({status: 404, message: 'filter not found'}.to_json)
+ send_message(ws, {status: 404, message: 'filter not found'})
end
else
- ws.send ({status: 400, message: "missing or unrecognized method"}.to_json)
+ send_message(ws, {status: 400, message: "missing or unrecognized method"})
end
rescue => e
Rails.logger.warn "Error handling message: #{$!}"
Rails.logger.warn "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- ws.send ({status: 500, message: 'error'}.to_json)
+ send_message(ws, {status: 500, message: 'error'})
ws.close
end
end
- # Constant maximum number of filters, to avoid silly huge database queries.
- MAX_FILTERS = 16
+ def overloaded?
+ @mtx.synchronize do
+ @connection_count >= Rails.configuration.websocket_max_connections
+ end
+ end
# Called by RackSocket when a new websocket connection has been established.
def on_connect ws
-
# Disconnect if no valid API token.
# current_user is included from CurrentApiClient
if not current_user
- ws.send ({status: 401, message: "Valid API token required"}.to_json)
- ws.close
+ send_message(ws, {status: 401, message: "Valid API token required"})
+ # Wait for the handshake to complete before closing the
+ # socket. Otherwise, nginx responds with HTTP 502 Bad gateway,
+ # and the client never sees our real error message.
+ ws.on :open do |event|
+ ws.close
+ end
return
end
ws.user = current_user
ws.filters = []
ws.last_log_id = nil
+ ws.sent_ids = Set.new
+ ws.queue = Queue.new
+ ws.frame_mtx = Mutex.new
+
+ @mtx.synchronize do
+ @connection_count += 1
+ end
- # Subscribe to internal postgres notifications through @channel. This will
- # call push_events when a notification comes through.
+ # Subscribe to internal postgres notifications through @channel and
+ # forward them to the thread associated with the connection.
sub = @channel.subscribe do |msg|
- push_events ws, msg
+ if ws.queue.length > Rails.configuration.websocket_max_notify_backlog
+ send_message(ws, {status: 500, message: 'Notify backlog too long'})
+ ws.close
+ @channel.unsubscribe sub
+ ws.queue.clear
+ else
+ ws.queue << [:notify, msg]
+ end
end
# Set up callback for inbound message dispatch.
ws.on :message do |event|
- handle_message ws, event
+ ws.queue << [:message, event]
end
# Set up socket close callback
ws.on :close do |event|
@channel.unsubscribe sub
- ws = nil
+ ws.queue.clear
+ ws.queue << [:close, nil]
+ end
+
+ # Spin off a new thread to handle sending events to the client. We need a
+ # separate thread per connection so that a slow client doesn't interfere
+ # with other clients.
+ #
+ # We don't want the loop in the request thread because on a TERM signal,
+ # Puma waits for outstanding requests to complete, and long-lived websocket
+ # connections may not complete in a timely manner.
+ Thread.new do
+ # Loop and react to socket events.
+ begin
+ loop do
+ eventType, msg = ws.queue.pop
+ if eventType == :message
+ handle_message ws, msg
+ elsif eventType == :notify
+ push_events ws, msg
+ elsif eventType == :close
+ break
+ end
+ end
+ ensure
+ @mtx.synchronize do
+ @connection_count -= 1
+ end
+ ActiveRecord::Base.connection.close
+ end
end
# Start up thread to monitor the Postgres database, if none exists already.
end
end
- # Since EventMachine is an asynchronous event based dispatcher, #on_connect
- # does not block but instead returns immediately after having set up the
- # websocket and notification channel callbacks.
end
end