[ARVADOS] created: 1.1.1-213-g363fc63
Git user
git at public.curoverse.com
Mon Dec 11 14:22:38 EST 2017
at 363fc6395f866a9631f41630cbff49ac53e9a211 (commit)
commit 363fc6395f866a9631f41630cbff49ac53e9a211
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Mon Dec 11 11:54:52 2017 -0500
12511: readable_by filters on is_trashed directly
Fix performance problem in the negative join NOT EXISTS(...) used to filter out
trashed items for admin users. The clause
(uuid = target_uuid OR owner_uuid = target_uuid) doesn't use the index
efficiently and results in a very expensive seq scan.
However, the "OR" in the negative clause is not necessary. We only need
materialized_permission_view to know if "owner_uuid" is trashed (directly or
indirectly), not "uuid". If the record type has an is_trashed flag (currently
only collections and groups) then it is more efficient to filter on that
directly.
This commit refactors the readable_by query to only check owner_uuid, and
additionally filter on "is_trashed" for collections and groups.
As an additional cleanup, because "readable_by" now explicitly knows to filter
on "is_trashed" this makes the default_scope in Collections redundant. This
commit also removes default_scope & various uses of unscoped reduces complexity.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 3803d37..fb75007 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -28,7 +28,7 @@ class Arvados::V1::CollectionsController < ApplicationController
def find_objects_for_index
if params[:include_trash] || ['destroy', 'trash', 'untrash'].include?(action_name)
- @objects = Collection.readable_by(*@read_users, {include_trash: true, query_on: Collection.unscoped})
+ @objects = Collection.readable_by(*@read_users, {include_trash: true})
end
super
end
diff --git a/services/api/app/controllers/arvados/v1/groups_controller.rb b/services/api/app/controllers/arvados/v1/groups_controller.rb
index 8e195ff..ec3b69a 100644
--- a/services/api/app/controllers/arvados/v1/groups_controller.rb
+++ b/services/api/app/controllers/arvados/v1/groups_controller.rb
@@ -163,12 +163,7 @@ class Arvados::V1::GroupsController < ApplicationController
end
end.compact
- query_on = if klass == Collection and params[:include_trash]
- klass.unscoped
- else
- klass
- end
- @objects = query_on.readable_by(*@read_users, {:include_trash => params[:include_trash]}).
+ @objects = klass.readable_by(*@read_users, {:include_trash => params[:include_trash]}).
order(request_order).where(where_conds)
klass_limit = limit_all - all_objects.count
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 08d7e93..05deba7 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -255,36 +255,44 @@ class ArvadosModel < ActiveRecord::Base
# Collect the UUIDs of the authorized users.
sql_table = kwargs.fetch(:table_name, table_name)
include_trash = kwargs.fetch(:include_trash, false)
- query_on = kwargs.fetch(:query_on, self)
sql_conds = []
user_uuids = users_list.map { |u| u.uuid }
+ exclude_trashed_records = if !include_trash and (sql_table == "groups" or sql_table == "collections") then
+ # Only include records that are not explicitly trashed
+ "AND #{sql_table}.is_trashed = false"
+ else
+ ""
+ end
+
if users_list.select { |u| u.is_admin }.any?
if !include_trash
- # exclude rows that are explicitly trashed.
if sql_table != "api_client_authorizations"
- sql_conds.push "NOT EXISTS(SELECT 1
- FROM #{PERMISSION_VIEW}
- WHERE trashed = 1 AND
- (#{sql_table}.uuid = target_uuid OR #{sql_table}.owner_uuid = target_uuid))"
+ # Exclude rows where the owner is trashed
+ sql_conds.push "NOT EXISTS(SELECT 1 "+
+ "FROM #{PERMISSION_VIEW} "+
+ "WHERE trashed = 1 AND "+
+ "(#{sql_table}.owner_uuid = target_uuid)) "+
+ exclude_trashed_records
end
end
else
- if include_trash
- trashed_check = ""
- else
- trashed_check = "AND trashed = 0"
- end
-
- if sql_table != "api_client_authorizations" and sql_table != "groups"
- owner_check = "OR (target_uuid = #{sql_table}.owner_uuid AND target_owner_uuid IS NOT NULL)"
- else
- owner_check = ""
- end
+ trashed_check = if !include_trash then
+ "AND trashed = 0"
+ else
+ ""
+ end
+
+ owner_check = if sql_table != "api_client_authorizations" and sql_table != "groups" then
+ "OR (target_uuid = #{sql_table}.owner_uuid AND target_owner_uuid IS NOT NULL)"
+ else
+ ""
+ end
sql_conds.push "EXISTS(SELECT 1 FROM #{PERMISSION_VIEW} "+
- "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND (target_uuid = #{sql_table}.uuid #{owner_check}))"
+ "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND (target_uuid = #{sql_table}.uuid #{owner_check})) "+
+ exclude_trashed_records
if sql_table == "links"
# Match any permission link that gives one of the authorized
@@ -295,7 +303,7 @@ class ArvadosModel < ActiveRecord::Base
end
end
- query_on.where(sql_conds.join(' OR '),
+ self.where(sql_conds.join(' OR '),
user_uuids: user_uuids,
permission_link_classes: ['permission', 'resources'])
end
@@ -734,7 +742,7 @@ class ArvadosModel < ActiveRecord::Base
if self == ArvadosModel
# If called directly as ArvadosModel.find_by_uuid rather than via subclass,
# delegate to the appropriate subclass based on the given uuid.
- self.resource_class_for_uuid(uuid).unscoped.find_by_uuid(uuid)
+ self.resource_class_for_uuid(uuid).find_by_uuid(uuid)
else
super
end
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index ce7e9fe..c5fd96e 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -24,9 +24,6 @@ class Collection < ArvadosModel
validate :ensure_pdh_matches_manifest_text
before_save :set_file_names
- # Query only untrashed collections by default.
- default_scope { where("is_trashed = false") }
-
api_accessible :user, extend: :common do |t|
t.add :name
t.add :description
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index b3739da..edcb850 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -437,12 +437,10 @@ class Container < ArvadosModel
# that a container cannot "claim" a collection that it doesn't otherwise
# have access to just by setting the output field to the collection PDH.
if output_changed?
- c = Collection.unscoped do
- Collection.
- readable_by(current_user).
+ c = Collection.
+ readable_by(current_user, {include_trash: true}).
where(portable_data_hash: self.output).
first
- end
if !c
errors.add :output, "collection must exist and be readable by current user."
end
diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb
index 25becb2..3596bf3 100644
--- a/services/api/app/models/container_request.rb
+++ b/services/api/app/models/container_request.rb
@@ -120,9 +120,7 @@ class ContainerRequest < ArvadosModel
trash_at = db_current_time + self.output_ttl
end
end
- manifest = Collection.unscoped do
- Collection.where(portable_data_hash: pdh).first.manifest_text
- end
+ manifest = Collection.where(portable_data_hash: pdh).first.manifest_text
coll = Collection.new(owner_uuid: owner_uuid,
manifest_text: manifest,
diff --git a/services/api/lib/sweep_trashed_collections.rb b/services/api/lib/sweep_trashed_collections.rb
index 84497a1..a899191 100644
--- a/services/api/lib/sweep_trashed_collections.rb
+++ b/services/api/lib/sweep_trashed_collections.rb
@@ -9,10 +9,10 @@ module SweepTrashedCollections
def self.sweep_now
act_as_system_user do
- Collection.unscoped.
+ Collection.
where('delete_at is not null and delete_at < statement_timestamp()').
destroy_all
- Collection.unscoped.
+ Collection.
where('is_trashed = false and trash_at < statement_timestamp()').
update_all('is_trashed = true')
end
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb
index 47f0887..e6ecea2 100644
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -991,7 +991,7 @@ EOS
id: uuid,
}
assert_response 200
- c = Collection.unscoped.find_by_uuid(uuid)
+ c = Collection.find_by_uuid(uuid)
assert_operator c.trash_at, :<, db_current_time
assert_equal c.delete_at, c.trash_at + Rails.configuration.blob_signature_ttl
end
@@ -1003,7 +1003,7 @@ EOS
id: uuid,
}
assert_response 200
- c = Collection.unscoped.find_by_uuid(uuid)
+ c = Collection.find_by_uuid(uuid)
assert_operator c.trash_at, :<, db_current_time
assert_operator c.delete_at, :<, db_current_time
end
@@ -1023,7 +1023,7 @@ EOS
id: uuid,
}
assert_response 200
- c = Collection.unscoped.find_by_uuid(uuid)
+ c = Collection.find_by_uuid(uuid)
assert_operator c.trash_at, :<, db_current_time
assert_operator c.delete_at, :>=, time_before_trashing + Rails.configuration.default_trash_lifetime
end
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb
index ba8f1e5..62e3755 100644
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -352,9 +352,12 @@ class CollectionTest < ActiveSupport::TestCase
assert c.valid?
uuid = c.uuid
+ c = Collection.readable_by(current_user).where(uuid: uuid)
+ assert_not_empty c, 'Should be able to find live collection'
+
# mark collection as expired
- c.update_attributes!(trash_at: Time.new.strftime("%Y-%m-%d"))
- c = Collection.where(uuid: uuid)
+ c.first.update_attributes!(trash_at: Time.new.strftime("%Y-%m-%d"))
+ c = Collection.readable_by(current_user).where(uuid: uuid)
assert_empty c, 'Should not be able to find expired collection'
# recreate collection with the same name
@@ -419,7 +422,7 @@ class CollectionTest < ActiveSupport::TestCase
if fixture_name == :expired_collection
# Fixture-finder shorthand doesn't find trashed collections
# because they're not in the default scope.
- c = Collection.unscoped.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3ih')
+ c = Collection.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3ih')
else
c = collections(fixture_name)
end
@@ -488,7 +491,7 @@ class CollectionTest < ActiveSupport::TestCase
end
end
SweepTrashedCollections.sweep_now
- c = Collection.unscoped.where('uuid=? and is_trashed=true', c.uuid).first
+ c = Collection.where('uuid=? and is_trashed=true', c.uuid).first
assert c
act_as_user users(:active) do
assert Collection.create!(owner_uuid: c.owner_uuid,
@@ -498,9 +501,9 @@ class CollectionTest < ActiveSupport::TestCase
test "delete in SweepTrashedCollections" do
uuid = 'zzzzz-4zz18-3u1p5umicfpqszp' # deleted_on_next_sweep
- assert_not_empty Collection.unscoped.where(uuid: uuid)
+ assert_not_empty Collection.where(uuid: uuid)
SweepTrashedCollections.sweep_now
- assert_empty Collection.unscoped.where(uuid: uuid)
+ assert_empty Collection.where(uuid: uuid)
end
test "delete referring links in SweepTrashedCollections" do
@@ -512,10 +515,10 @@ class CollectionTest < ActiveSupport::TestCase
name: 'something')
end
past = db_current_time
- Collection.unscoped.where(uuid: uuid).
+ Collection.where(uuid: uuid).
update_all(is_trashed: true, trash_at: past, delete_at: past)
- assert_not_empty Collection.unscoped.where(uuid: uuid)
+ assert_not_empty Collection.where(uuid: uuid)
SweepTrashedCollections.sweep_now
- assert_empty Collection.unscoped.where(uuid: uuid)
+ assert_empty Collection.where(uuid: uuid)
end
end
diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb
index eb4f35f..3e2b8c1 100644
--- a/services/api/test/unit/container_test.rb
+++ b/services/api/test/unit/container_test.rb
@@ -596,7 +596,7 @@ class ContainerTest < ActiveSupport::TestCase
c.lock
c.update_attributes! state: Container::Running
- output = Collection.unscoped.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3jk')
+ output = Collection.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3jk')
assert output.is_trashed
assert c.update_attributes output: output.portable_data_hash
@@ -609,7 +609,7 @@ class ContainerTest < ActiveSupport::TestCase
c.lock
c.update_attributes! state: Container::Running
- output = Collection.unscoped.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3jr')
+ output = Collection.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3jr')
Thread.current[:api_client_authorization] = ApiClientAuthorization.find_by_uuid(c.auth_uuid)
Thread.current[:user] = User.find_by_id(Thread.current[:api_client_authorization].user_id)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list