[ARVADOS] created: 37e995fb7423b2b353c599e2a1b00bda7c29ee6f

Git user git at public.curoverse.com
Tue Mar 21 11:39:38 EDT 2017


        at  37e995fb7423b2b353c599e2a1b00bda7c29ee6f (commit)


commit 37e995fb7423b2b353c599e2a1b00bda7c29ee6f
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Mar 21 11:39:20 2017 -0400

    11065: Delete old audit logs.

diff --git a/services/api/app/models/log.rb b/services/api/app/models/log.rb
index 3207d1f..8c57e10 100644
--- a/services/api/app/models/log.rb
+++ b/services/api/app/models/log.rb
@@ -1,3 +1,5 @@
+require 'audit_logs'
+
 class Log < ArvadosModel
   include HasUuid
   include KindAndEtag
@@ -5,6 +7,7 @@ class Log < ArvadosModel
   serialize :properties, Hash
   before_validation :set_default_event_at
   after_save :send_notify
+  after_save { AuditLogs.tidy_in_background }
 
   api_accessible :user, extend: :common do |t|
     t.add :id
@@ -101,5 +104,4 @@ class Log < ArvadosModel
   def send_notify
     connection.execute "NOTIFY logs, '#{self.id}'"
   end
-
 end
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml
index cae6bbd..fbecc96 100644
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -232,6 +232,22 @@ common:
   # crunchstat logs from the logs table.
   clean_container_log_rows_after: <%= 30.days %>
 
+  # Time to keep audit logs (a row in the log table added each time an
+  # Arvados object is created, modified, or deleted) in the PostgreSQL
+  # database. Currently, websocket event notifications rely on audit
+  # logs, so this should not be set lower than 600 (5 minutes).
+  max_audit_log_age: 1209600
+
+  # Maximum number of log rows to delete in a single SQL transaction.
+  #
+  # If max_audit_log_delete_batch is 0, log entries will never be
+  # deleted by Arvados. Cleanup can be done by an external process
+  # without affecting any Arvados system processes, as long as very
+  # recent (<5 minutes old) logs are not deleted.
+  #
+  # 100000 is a reasonable batch size for most sites.
+  max_audit_log_delete_batch: 0
+
   # The maximum number of compute nodes that can be in use simultaneously
   # If this limit is reduced, any existing nodes with slot number >= new limit
   # will not be counted against the new limit. In other words, the new limit
@@ -464,3 +480,4 @@ test:
   websocket_address: <% if ENV['ARVADOS_TEST_EXPERIMENTAL_WS'] %>"wss://0.0.0.0:<%= ENV['ARVADOS_TEST_WSS_PORT'] %>/websocket"<% else %>false<% end %>
   trash_sweep_interval: -1
   docker_image_formats: ["v1"]
+  max_audit_log_delete_batch: 0
diff --git a/services/api/lib/audit_logs.rb b/services/api/lib/audit_logs.rb
new file mode 100644
index 0000000..8a5a630
--- /dev/null
+++ b/services/api/lib/audit_logs.rb
@@ -0,0 +1,65 @@
+require 'current_api_client'
+require 'db_current_time'
+
+module AuditLogs
+  extend CurrentApiClient
+  extend DbCurrentTime
+
+  def self.delete_old(max_age:, max_batch:)
+    act_as_system_user do
+      if !File.owned?(Rails.root.join('tmp'))
+        Rails.logger.warn("AuditLogs: not owner of #{Rails.root}/tmp, skipping")
+        return
+      end
+      lockfile = Rails.root.join('tmp', 'audit_logs.lock')
+      File.open(lockfile, File::RDWR|File::CREAT, 0600) do |f|
+        return unless f.flock(File::LOCK_NB|File::LOCK_EX)
+
+        sql = "select clock_timestamp() - interval '#{max_age} seconds'"
+        threshold = ActiveRecord::Base.connection.select_value(sql).to_time.utc
+        Rails.logger.info "AuditLogs: deleting logs older than #{threshold}"
+
+        did_total = 0
+        loop do
+          sql = Log.unscoped.
+                select(:id).
+                order(:created_at).
+                where('event_type in (?)', ['create', 'update', 'destroy', 'delete']).
+                where('created_at < ?', threshold).
+                limit(max_batch).
+                to_sql
+          did = Log.unscoped.where("id in (#{sql})").delete_all
+          did_total += did
+
+          Rails.logger.info "AuditLogs: deleted batch of #{did}"
+          break if did == 0
+        end
+        Rails.logger.info "AuditLogs: deleted total #{did_total}"
+      end
+    end
+  end
+
+  def self.tidy_in_background
+    max_age = Rails.configuration.max_audit_log_age
+    max_batch = Rails.configuration.max_audit_log_delete_batch
+    return if max_age <= 0 || max_batch <= 0
+
+    exp = (max_age/14).seconds
+    need = false
+    Rails.cache.fetch('AuditLogs', expires_in: exp) do
+      need = true
+    end
+    return if !need
+
+    Thread.new do
+      Thread.current.abort_on_exception = false
+      begin
+        delete_old(max_age: max_age, max_batch: max_batch)
+      rescue => e
+        Rails.logger.error "#{e.class}: #{e}\n#{e.backtrace.join("\n\t")}"
+      ensure
+        ActiveRecord::Base.connection.close
+      end
+    end
+  end
+end
diff --git a/services/api/test/fixtures/logs.yml b/services/api/test/fixtures/logs.yml
index d83cf96..a39aff5 100644
--- a/services/api/test/fixtures/logs.yml
+++ b/services/api/test/fixtures/logs.yml
@@ -13,6 +13,7 @@ admin_changes_repository2: # admin changes repository2, which is owned by active
   object_uuid: zzzzz-2x53u-382brsig8rp3667 # repository foo
   object_owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz # active user
   event_at: <%= 2.minute.ago.to_s(:db) %>
+  event_type: update
 
 admin_changes_specimen: # admin changes specimen owned_by_spectator
   id: 3
@@ -21,6 +22,7 @@ admin_changes_specimen: # admin changes specimen owned_by_spectator
   object_uuid: zzzzz-2x53u-3b0xxwzlbzxq5yr # specimen owned_by_spectator
   object_owner_uuid: zzzzz-tpzed-l1s2piq4t4mps8r # spectator user
   event_at: <%= 3.minute.ago.to_s(:db) %>
+  event_type: update
 
 system_adds_foo_file: # foo collection added, readable by active through link
   id: 4
@@ -29,6 +31,7 @@ system_adds_foo_file: # foo collection added, readable by active through link
   object_uuid: zzzzz-4zz18-znfnqtbbv4spc3w # foo file
   object_owner_uuid: zzzzz-tpzed-000000000000000 # system user
   event_at: <%= 4.minute.ago.to_s(:db) %>
+  event_type: create
 
 system_adds_baz: # baz collection added, readable by active and spectator through group 'all users' group membership
   id: 5
@@ -37,6 +40,7 @@ system_adds_baz: # baz collection added, readable by active and spectator throug
   object_uuid: zzzzz-4zz18-y9vne9npefyxh8g # baz file
   object_owner_uuid: zzzzz-tpzed-000000000000000 # system user
   event_at: <%= 5.minute.ago.to_s(:db) %>
+  event_type: create
 
 log_owned_by_active:
   id: 6
diff --git a/services/api/test/unit/log_test.rb b/services/api/test/unit/log_test.rb
index a1a19ca..370bec1 100644
--- a/services/api/test/unit/log_test.rb
+++ b/services/api/test/unit/log_test.rb
@@ -1,4 +1,5 @@
 require 'test_helper'
+require 'audit_logs'
 
 class LogTest < ActiveSupport::TestCase
   include CurrentApiClient
@@ -311,4 +312,73 @@ class LogTest < ActiveSupport::TestCase
       end
     end
   end
+
+  def assert_no_logs_deleted
+    logs_before = Log.unscoped.all.count
+    yield
+    assert_equal logs_before, Log.unscoped.all.count
+  end
+
+  def remaining_audit_logs
+    Log.unscoped.where('event_type in (?)', %w(create update destroy delete))
+  end
+
+  test 'retain old audit logs with default settings' do
+    assert_no_logs_deleted do
+      AuditLogs.delete_old(
+        max_age: Rails.configuration.max_audit_log_age,
+        max_batch: Rails.configuration.max_audit_log_delete_batch)
+    end
+  end
+
+  test 'retain old audit logs with batch=0' do
+    assert_no_logs_deleted do
+      AuditLogs.delete_old(max_age: 1, max_batch: 0)
+    end
+  end
+
+  test 'delete old audit logs with production settings' do
+    initial_log_count = Log.unscoped.all.count
+    AuditLogs.delete_old(max_age: 180, max_batch: 100000)
+    assert_operator remaining_audit_logs.count, :<, initial_log_count
+  end
+
+  test 'delete old audit logs in multiple batches' do
+    AuditLogs.delete_old(max_age: 0, max_batch: 2)
+    assert_equal [], remaining_audit_logs.collect(&:uuid)
+  end
+
+  test 'delete old audit logs in thread' do
+    begin
+      Rails.configuration.max_audit_log_age = 20
+      Rails.configuration.max_audit_log_delete_batch = 100000
+      Rails.cache.delete 'AuditLogs'
+      initial_log_count = Log.unscoped.all.count + 1
+      act_as_system_user do
+        Log.create!()
+        initial_log_count += 1
+      end
+      deadline = Time.now + 10
+      while remaining_audit_logs.count == initial_log_count
+        if Time.now > deadline
+          raise "timed out"
+        end
+        sleep 0.1
+      end
+      assert_operator remaining_audit_logs.count, :<, initial_log_count
+    ensure
+      # The test framework rolls back our transactions, but that
+      # doesn't undo the deletes we did from separate threads.
+      ActiveRecord::Base.connection.exec_query 'ROLLBACK'
+      Thread.new do
+        begin
+          dc = DatabaseController.new
+          dc.define_singleton_method :render do |*args| end
+          dc.reset
+        ensure
+          ActiveRecord::Base.connection.close
+        end
+      end.join
+    end
+  end
 end

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list