[ARVADOS] created: 37e995fb7423b2b353c599e2a1b00bda7c29ee6f
Git user
git at public.curoverse.com
Tue Mar 21 11:39:38 EDT 2017
at 37e995fb7423b2b353c599e2a1b00bda7c29ee6f (commit)
commit 37e995fb7423b2b353c599e2a1b00bda7c29ee6f
Author: Tom Clegg <tom at curoverse.com>
Date: Tue Mar 21 11:39:20 2017 -0400
11065: Delete old audit logs.
diff --git a/services/api/app/models/log.rb b/services/api/app/models/log.rb
index 3207d1f..8c57e10 100644
--- a/services/api/app/models/log.rb
+++ b/services/api/app/models/log.rb
@@ -1,3 +1,5 @@
+require 'audit_logs'
+
class Log < ArvadosModel
include HasUuid
include KindAndEtag
@@ -5,6 +7,7 @@ class Log < ArvadosModel
serialize :properties, Hash
before_validation :set_default_event_at
after_save :send_notify
+ after_save { AuditLogs.tidy_in_background }
api_accessible :user, extend: :common do |t|
t.add :id
@@ -101,5 +104,4 @@ class Log < ArvadosModel
def send_notify
connection.execute "NOTIFY logs, '#{self.id}'"
end
-
end
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml
index cae6bbd..fbecc96 100644
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -232,6 +232,22 @@ common:
# crunchstat logs from the logs table.
clean_container_log_rows_after: <%= 30.days %>
+ # Time to keep audit logs (a row in the log table added each time an
+ # Arvados object is created, modified, or deleted) in the PostgreSQL
+ # database. Currently, websocket event notifications rely on audit
+ # logs, so this should not be set lower than 600 (5 minutes).
+ max_audit_log_age: 1209600
+
+ # Maximum number of log rows to delete in a single SQL transaction.
+ #
+ # If max_audit_log_delete_batch is 0, log entries will never be
+ # deleted by Arvados. Cleanup can be done by an external process
+ # without affecting any Arvados system processes, as long as very
+ # recent (<5 minutes old) logs are not deleted.
+ #
+ # 100000 is a reasonable batch size for most sites.
+ max_audit_log_delete_batch: 0
+
# The maximum number of compute nodes that can be in use simultaneously
# If this limit is reduced, any existing nodes with slot number >= new limit
# will not be counted against the new limit. In other words, the new limit
@@ -464,3 +480,4 @@ test:
websocket_address: <% if ENV['ARVADOS_TEST_EXPERIMENTAL_WS'] %>"wss://0.0.0.0:<%= ENV['ARVADOS_TEST_WSS_PORT'] %>/websocket"<% else %>false<% end %>
trash_sweep_interval: -1
docker_image_formats: ["v1"]
+ max_audit_log_delete_batch: 0
diff --git a/services/api/lib/audit_logs.rb b/services/api/lib/audit_logs.rb
new file mode 100644
index 0000000..8a5a630
--- /dev/null
+++ b/services/api/lib/audit_logs.rb
@@ -0,0 +1,65 @@
+require 'current_api_client'
+require 'db_current_time'
+
+module AuditLogs
+ extend CurrentApiClient
+ extend DbCurrentTime
+
+ def self.delete_old(max_age:, max_batch:)
+ act_as_system_user do
+ if !File.owned?(Rails.root.join('tmp'))
+ Rails.logger.warn("AuditLogs: not owner of #{Rails.root}/tmp, skipping")
+ return
+ end
+ lockfile = Rails.root.join('tmp', 'audit_logs.lock')
+ File.open(lockfile, File::RDWR|File::CREAT, 0600) do |f|
+ return unless f.flock(File::LOCK_NB|File::LOCK_EX)
+
+ sql = "select clock_timestamp() - interval '#{max_age} seconds'"
+ threshold = ActiveRecord::Base.connection.select_value(sql).to_time.utc
+ Rails.logger.info "AuditLogs: deleting logs older than #{threshold}"
+
+ did_total = 0
+ loop do
+ sql = Log.unscoped.
+ select(:id).
+ order(:created_at).
+ where('event_type in (?)', ['create', 'update', 'destroy', 'delete']).
+ where('created_at < ?', threshold).
+ limit(max_batch).
+ to_sql
+ did = Log.unscoped.where("id in (#{sql})").delete_all
+ did_total += did
+
+ Rails.logger.info "AuditLogs: deleted batch of #{did}"
+ break if did == 0
+ end
+ Rails.logger.info "AuditLogs: deleted total #{did_total}"
+ end
+ end
+ end
+
+ def self.tidy_in_background
+ max_age = Rails.configuration.max_audit_log_age
+ max_batch = Rails.configuration.max_audit_log_delete_batch
+ return if max_age <= 0 || max_batch <= 0
+
+ exp = (max_age/14).seconds
+ need = false
+ Rails.cache.fetch('AuditLogs', expires_in: exp) do
+ need = true
+ end
+ return if !need
+
+ Thread.new do
+ Thread.current.abort_on_exception = false
+ begin
+ delete_old(max_age: max_age, max_batch: max_batch)
+ rescue => e
+ Rails.logger.error "#{e.class}: #{e}\n#{e.backtrace.join("\n\t")}"
+ ensure
+ ActiveRecord::Base.connection.close
+ end
+ end
+ end
+end
diff --git a/services/api/test/fixtures/logs.yml b/services/api/test/fixtures/logs.yml
index d83cf96..a39aff5 100644
--- a/services/api/test/fixtures/logs.yml
+++ b/services/api/test/fixtures/logs.yml
@@ -13,6 +13,7 @@ admin_changes_repository2: # admin changes repository2, which is owned by active
object_uuid: zzzzz-2x53u-382brsig8rp3667 # repository foo
object_owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz # active user
event_at: <%= 2.minute.ago.to_s(:db) %>
+ event_type: update
admin_changes_specimen: # admin changes specimen owned_by_spectator
id: 3
@@ -21,6 +22,7 @@ admin_changes_specimen: # admin changes specimen owned_by_spectator
object_uuid: zzzzz-2x53u-3b0xxwzlbzxq5yr # specimen owned_by_spectator
object_owner_uuid: zzzzz-tpzed-l1s2piq4t4mps8r # spectator user
event_at: <%= 3.minute.ago.to_s(:db) %>
+ event_type: update
system_adds_foo_file: # foo collection added, readable by active through link
id: 4
@@ -29,6 +31,7 @@ system_adds_foo_file: # foo collection added, readable by active through link
object_uuid: zzzzz-4zz18-znfnqtbbv4spc3w # foo file
object_owner_uuid: zzzzz-tpzed-000000000000000 # system user
event_at: <%= 4.minute.ago.to_s(:db) %>
+ event_type: create
system_adds_baz: # baz collection added, readable by active and spectator through group 'all users' group membership
id: 5
@@ -37,6 +40,7 @@ system_adds_baz: # baz collection added, readable by active and spectator throug
object_uuid: zzzzz-4zz18-y9vne9npefyxh8g # baz file
object_owner_uuid: zzzzz-tpzed-000000000000000 # system user
event_at: <%= 5.minute.ago.to_s(:db) %>
+ event_type: create
log_owned_by_active:
id: 6
diff --git a/services/api/test/unit/log_test.rb b/services/api/test/unit/log_test.rb
index a1a19ca..370bec1 100644
--- a/services/api/test/unit/log_test.rb
+++ b/services/api/test/unit/log_test.rb
@@ -1,4 +1,5 @@
require 'test_helper'
+require 'audit_logs'
class LogTest < ActiveSupport::TestCase
include CurrentApiClient
@@ -311,4 +312,73 @@ class LogTest < ActiveSupport::TestCase
end
end
end
+
+ def assert_no_logs_deleted
+ logs_before = Log.unscoped.all.count
+ yield
+ assert_equal logs_before, Log.unscoped.all.count
+ end
+
+ def remaining_audit_logs
+ Log.unscoped.where('event_type in (?)', %w(create update destroy delete))
+ end
+
+ test 'retain old audit logs with default settings' do
+ assert_no_logs_deleted do
+ AuditLogs.delete_old(
+ max_age: Rails.configuration.max_audit_log_age,
+ max_batch: Rails.configuration.max_audit_log_delete_batch)
+ end
+ end
+
+ test 'retain old audit logs with batch=0' do
+ assert_no_logs_deleted do
+ AuditLogs.delete_old(max_age: 1, max_batch: 0)
+ end
+ end
+
+ test 'delete old audit logs with production settings' do
+ initial_log_count = Log.unscoped.all.count
+ AuditLogs.delete_old(max_age: 180, max_batch: 100000)
+ assert_operator remaining_audit_logs.count, :<, initial_log_count
+ end
+
+ test 'delete old audit logs in multiple batches' do
+ AuditLogs.delete_old(max_age: 0, max_batch: 2)
+ assert_equal [], remaining_audit_logs.collect(&:uuid)
+ end
+
+ test 'delete old audit logs in thread' do
+ begin
+ Rails.configuration.max_audit_log_age = 20
+ Rails.configuration.max_audit_log_delete_batch = 100000
+ Rails.cache.delete 'AuditLogs'
+ initial_log_count = Log.unscoped.all.count + 1
+ act_as_system_user do
+ Log.create!()
+ initial_log_count += 1
+ end
+ deadline = Time.now + 10
+ while remaining_audit_logs.count == initial_log_count
+ if Time.now > deadline
+ raise "timed out"
+ end
+ sleep 0.1
+ end
+ assert_operator remaining_audit_logs.count, :<, initial_log_count
+ ensure
+ # The test framework rolls back our transactions, but that
+ # doesn't undo the deletes we did from separate threads.
+ ActiveRecord::Base.connection.exec_query 'ROLLBACK'
+ Thread.new do
+ begin
+ dc = DatabaseController.new
+ dc.define_singleton_method :render do |*args| end
+ dc.reset
+ ensure
+ ActiveRecord::Base.connection.close
+ end
+ end.join
+ end
+ end
end
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list