[ARVADOS] created: 54a951255316417a42a3bd8c77aaa0b58d180440

git at public.curoverse.com git at public.curoverse.com
Wed Oct 1 12:00:34 EDT 2014


        at  54a951255316417a42a3bd8c77aaa0b58d180440 (commit)


commit 54a951255316417a42a3bd8c77aaa0b58d180440
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Oct 1 12:00:25 2014 -0400

    3769: Allow up to crunch_limit_log_event_throttle_rate per
    crunch_limit_log_event_throttle_period, then silence the logs until the next
    crunch_limit_log_event_throttle_period.

diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml
index 7571bb0..7185810 100644
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -83,6 +83,13 @@ common:
   # Maximum number of total bytes that may be logged by a single job.
   crunch_limit_log_event_bytes_per_job: 67108864
 
+  # The sample period for throttling logs, in seconds (see below)
+  crunch_limit_log_event_throttle_period: 60
+
+  # Maximum number of bytes that job can log over
+  # crunch_limit_log_event_throttle_period before being silenced
+  crunch_limit_log_event_throttle_rate: 65536
+
   # These two settings control how frequently log events are flushed
   # to the database.  If a job generates two or more events within
   # crunch_log_seconds_between_events, the log data is not flushed
diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
index 00144d3..a5afb22 100755
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -276,7 +276,7 @@ class Dispatcher
           $stderr.puts "dispatch: git fetch-pack failed"
           sleep 1
           next
-        end        
+        end
       end
 
       # check if the commit needs to be tagged with this job uuid
@@ -342,7 +342,10 @@ class Dispatcher
         stderr_flushed_at: 0,
         bytes_logged: 0,
         events_logged: 0,
-        log_truncated: false
+        log_truncated: false,
+        log_throttle_timestamp: 0,
+        log_throttle_bytes_so_far: 0,
+        log_throttle_bytes_skipped: 0,
       }
       i.close
       update_node_status
@@ -553,6 +556,40 @@ class Dispatcher
     return if running_job[:log_truncated]
     return if running_job[:stderr_buf_to_flush] == ''
     begin
+      now = Time.now
+      throttle_period = Rails.configuration.crunch_limit_log_event_throttle_period
+
+      if (now - running_job[:log_throttle_timestamp]) > throttle_period
+        # It has been more than throttle_period seconds since the last checkpoint so reset the
+        # throttle
+        if running_job[:log_throttle_bytes_skipped] > 0
+          running_job[:stderr_buf_to_flush] << "Skipped #{running_job[:log_throttle_bytes_skipped]} bytes of log"
+        end
+
+        running_job[:log_throttle_timestamp] = now
+        running_job[:log_throttle_bytes_so_far] = 0
+        running_job[:log_throttle_bytes_skipped] = 0
+      end
+
+      if running_job[:log_throttle_bytes_skipped] > 0
+        # We've skipped some log in this time period already, so continue to
+        # skip the log
+        running_job[:log_throttle_bytes_skipped] += running_job[:stderr_buf_to_flush].size
+        return
+      end
+
+      # Record bytes logged so far in this period
+      running_job[:log_throttle_bytes_so_far] += running_job[:stderr_buf_to_flush].size
+
+      if running_job[:log_throttle_bytes_so_far] > Rails.configuration.crunch_limit_log_event_throttle_rate
+        # We've exceeded the throttle rate, so start skipping
+        running_job[:log_throttle_bytes_skipped] += running_job[:stderr_buf_to_flush].size
+
+        # Replace the message with a message about skipping the log and log that instead
+        remaining_time = throttle_period - (now - running_job[:log_throttle_timestamp])
+        running_job[:stderr_buf_to_flush] = "Exceeded log rate of #{Rails.configuration.crunch_limit_log_event_throttle_rate} per #{throttle_period} seconds, logging will be silenced for the next #{remaining_time} seconds\n"
+      end
+
       # Truncate logs if they exceed crunch_limit_log_event_bytes_per_job
       # or crunch_limit_log_events_per_job.
       if (too_many_bytes_logged_for_job(running_job))

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list