[ARVADOS] created: 6d910ff1327859e0eae9eed474ee360fb9c5894b

Git user git at public.curoverse.com
Fri Jun 17 16:17:32 EDT 2016


        at  6d910ff1327859e0eae9eed474ee360fb9c5894b (commit)


commit 6d910ff1327859e0eae9eed474ee360fb9c5894b
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Jun 17 16:12:15 2016 -0400

    9427: Websockets now uses a thread per connection for database query and
    sending outgoing messages.  This minimizes interference by slow clients or
    clients that are "replaying the world" with a last_log_id far in the past.

diff --git a/services/api/lib/eventbus.rb b/services/api/lib/eventbus.rb
index b35da1b..c72ad90 100644
--- a/services/api/lib/eventbus.rb
+++ b/services/api/lib/eventbus.rb
@@ -8,6 +8,7 @@ require 'faye/websocket'
 require 'record_filters'
 require 'load_param'
 require 'set'
+require 'thread'
 
 # Patch in user, last_log_id and filters fields into the Faye::Websocket class.
 module Faye
@@ -16,7 +17,28 @@ module Faye
     attr_accessor :last_log_id
     attr_accessor :filters
     attr_accessor :sent_ids
-    attr_accessor :notify_queue
+    attr_accessor :queue
+    attr_accessor :frame_mtx
+  end
+end
+
+module WebSocket
+  class Driver
+
+    class Hybi < Driver
+      alias_method :_frame, :frame
+
+      def frame(data, type = nil, code = nil)
+        # Most of the sending activity will be from the thread set up in
+        # on_connect.  However, there is also some automatic activity in the
+        # form of ping/pong messages, so ensure that the frame method (which
+        # actually packs and sends one complete message to the underlying
+        # socket) can only be called by one thread at a time.
+        @socket.frame_mtx.synchronize do
+          _frame(data, type, code)
+        end
+      end
+    end
   end
 end
 
@@ -63,11 +85,9 @@ class EventBus
   #
   # It queries the database for log rows that are either
   #  a) greater than ws.last_log_id, which is the last log id which was a candidate to be sent out
-  #  b) if ws.last_log_id is nil, then it queries rows starting with notify_id
+  #  b) if ws.last_log_id is nil, then it queries the row notify_id
   #
-  # Regular Arvados permissions are applied using readable_by() and filters using record_filters()
-  # To avoid clogging up the database, queries are limited to batches of 100.  It will schedule a new
-  # push_events call if there are more log rows to send.
+  # Regular Arvados permissions are applied using readable_by() and filters using record_filters().
   def push_events ws, notify_id
     begin
       # Must have at least one filter set up to receive events
@@ -79,18 +99,14 @@ class EventBus
         cond_out = []
         param_out = []
 
-        if not notify_id.nil?
-          ws.notify_queue.unshift notify_id
-        end
-
         if not ws.last_log_id.nil?
           # We are catching up from some starting point.
           cond_id = "logs.id > ?"
           param_out << ws.last_log_id
-        elsif ws.notify_queue.length > 0
+        elsif not notify_id.nil?
           # Get next row being notified.
           cond_id = "logs.id = ?"
-          param_out << ws.notify_queue.pop
+          param_out << notify_id
         else
           # No log id to start from, nothing to do, return
           return
@@ -116,11 +132,7 @@ class EventBus
         end
 
         # Execute query and actually send the matching log rows
-        count = 0
-        limit = 10
-
-        lastid = nil
-        logs.limit(limit).each do |l|
+        logs.each do |l|
           if not ws.sent_ids.include?(l.id)
             # only send if not a duplicate
             ws.send(l.as_api_response.to_json)
@@ -129,28 +141,8 @@ class EventBus
             # record ids only when sending "catchup" messages, not notifies
             ws.sent_ids << l.id
           end
-          lastid = l.id
-          count += 1
-        end
-
-        if count == limit
-          # Number of rows returned was capped by limit(), we need to schedule
-          # another query to get more logs (will start from last_log_id
-          # reported by current query)
-          ws.last_log_id = lastid
-          EventMachine::next_tick do
-            push_events ws, nil
-          end
-        elsif !ws.last_log_id.nil?
-          # Done catching up
-          ws.last_log_id = nil
-        end
-
-        if ws.notify_queue.length > 0
-          EventMachine::next_tick do
-            push_events ws, nil
-          end
         end
+        ws.last_log_id = nil
       end
     rescue ArgumentError => e
       # There was some kind of user error.
@@ -246,23 +238,55 @@ class EventBus
     ws.filters = []
     ws.last_log_id = nil
     ws.sent_ids = Set.new
-    ws.notify_queue = Array.new
+    ws.queue = Queue.new
+    ws.frame_mtx = Mutex.new
 
-    # Subscribe to internal postgres notifications through @channel.  This will
-    # call push_events when a notification comes through.
+    # Subscribe to internal postgres notifications through @channel and
+    # forward them to the thread associated with the connection.
     sub = @channel.subscribe do |msg|
-      push_events ws, msg
+      if msg != :term
+        ws.queue << [:notify, msg]
+      else
+        ws.close
+      end
     end
 
     # Set up callback for inbound message dispatch.
     ws.on :message do |event|
-      handle_message ws, event
+      ws.queue << [:message, event]
     end
 
     # Set up socket close callback
     ws.on :close do |event|
       @channel.unsubscribe sub
-      ws = nil
+      ws.queue.clear
+      ws.queue << [:close, nil]
+    end
+
+    # Spin off a new thread to handle sending events to the client.  We need a
+    # separate thread per connection so that a slow client doesn't interfere
+    # with other clients.
+    #
+    # We don't want the loop in the request thread because on a TERM signal,
+    # Puma waits for outstanding requests to complete, and long-lived websocket
+    # connections may not complete in a timely manner.
+    Thread.new do
+      # Loop and react to socket events.
+      loop do
+        eventType, msg = ws.queue.pop
+        if ws.queue.length > 1000
+          ws.send ({status: 500, message: 'Notify backlog too long'}.to_json)
+          ws.close
+        else
+          if eventType == :message
+            handle_message ws, msg
+          elsif eventType == :notify
+            push_events ws, msg
+          elsif eventType == :close
+            break
+          end
+        end
+      end
     end
 
     # Start up thread to monitor the Postgres database, if none exists already.
@@ -298,8 +322,5 @@ class EventBus
       end
     end
 
-    # Since EventMachine is an asynchronous event based dispatcher, #on_connect
-    # does not block but instead returns immediately after having set up the
-    # websocket and notification channel callbacks.
   end
 end

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list