[ARVADOS] created: 1.1.1-213-gc7a7340
Git user
git at public.curoverse.com
Mon Dec 11 12:17:58 EST 2017
at c7a7340b24d885f7e77ca61288b63a8afc09f58e (commit)
commit c7a7340b24d885f7e77ca61288b63a8afc09f58e
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Mon Dec 11 11:54:52 2017 -0500
12511: readable_by filters on is_trashed directly
Fix performance problem in the negative join NOT EXISTS(...) used to filter out
trashed items for admin users. The clause
(uuid = target_uuid OR owner_uuid = target_uuid) doesn't use the index
efficiently and results in a very expensive seq scan.
However, the "OR" in the negative clause is not necessary. We only need
materialized_permission_view to know if "owner_uuid" is trashed (directly or
indirectly), not "uuid". If the record type has an is_trashed flag (currently
only collections and groups) then it is more efficient to filter on that
directly.
This commit refactors the readable_by query to only check owner_uuid, and
additionally filter on "is_trashed" for collections and groups.
As an additional cleanup, because "readable_by" now explicitly knows to filter
on "is_trashed" this makes the default_scope in Collections redundant. This
commit also removes default_scope & various uses of unscoped reduces complexity.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 3803d37..fb75007 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -28,7 +28,7 @@ class Arvados::V1::CollectionsController < ApplicationController
def find_objects_for_index
if params[:include_trash] || ['destroy', 'trash', 'untrash'].include?(action_name)
- @objects = Collection.readable_by(*@read_users, {include_trash: true, query_on: Collection.unscoped})
+ @objects = Collection.readable_by(*@read_users, {include_trash: true})
end
super
end
diff --git a/services/api/app/controllers/arvados/v1/groups_controller.rb b/services/api/app/controllers/arvados/v1/groups_controller.rb
index 8e195ff..ec3b69a 100644
--- a/services/api/app/controllers/arvados/v1/groups_controller.rb
+++ b/services/api/app/controllers/arvados/v1/groups_controller.rb
@@ -163,12 +163,7 @@ class Arvados::V1::GroupsController < ApplicationController
end
end.compact
- query_on = if klass == Collection and params[:include_trash]
- klass.unscoped
- else
- klass
- end
- @objects = query_on.readable_by(*@read_users, {:include_trash => params[:include_trash]}).
+ @objects = klass.readable_by(*@read_users, {:include_trash => params[:include_trash]}).
order(request_order).where(where_conds)
klass_limit = limit_all - all_objects.count
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 08d7e93..e1c86bb 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -260,31 +260,40 @@ class ArvadosModel < ActiveRecord::Base
sql_conds = []
user_uuids = users_list.map { |u| u.uuid }
+ exclude_trashed_records = if !include_trash and (sql_table == "groups" or sql_table == "collections") then
+ # Only include records that are not explicitly trashed
+ "AND #{sql_table}.is_trashed = false"
+ else
+ ""
+ end
+
if users_list.select { |u| u.is_admin }.any?
if !include_trash
- # exclude rows that are explicitly trashed.
if sql_table != "api_client_authorizations"
- sql_conds.push "NOT EXISTS(SELECT 1
- FROM #{PERMISSION_VIEW}
- WHERE trashed = 1 AND
- (#{sql_table}.uuid = target_uuid OR #{sql_table}.owner_uuid = target_uuid))"
+ # Exclude rows where the owner is trashed
+ sql_conds.push "NOT EXISTS(SELECT 1 "+
+ "FROM #{PERMISSION_VIEW} "+
+ "WHERE trashed = 1 AND "+
+ "(#{sql_table}.owner_uuid = target_uuid)) "+
+ exclude_trashed_records
end
end
else
- if include_trash
- trashed_check = ""
- else
- trashed_check = "AND trashed = 0"
- end
-
- if sql_table != "api_client_authorizations" and sql_table != "groups"
- owner_check = "OR (target_uuid = #{sql_table}.owner_uuid AND target_owner_uuid IS NOT NULL)"
- else
- owner_check = ""
- end
+ trashed_check = if !include_trash then
+ "AND trashed = 0"
+ else
+ ""
+ end
+
+ owner_check = if sql_table != "api_client_authorizations" and sql_table != "groups" then
+ "OR (target_uuid = #{sql_table}.owner_uuid AND target_owner_uuid IS NOT NULL)"
+ else
+ ""
+ end
sql_conds.push "EXISTS(SELECT 1 FROM #{PERMISSION_VIEW} "+
- "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND (target_uuid = #{sql_table}.uuid #{owner_check}))"
+ "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND (target_uuid = #{sql_table}.uuid #{owner_check})) "+
+ exclude_trashed_records
if sql_table == "links"
# Match any permission link that gives one of the authorized
@@ -734,7 +743,7 @@ class ArvadosModel < ActiveRecord::Base
if self == ArvadosModel
# If called directly as ArvadosModel.find_by_uuid rather than via subclass,
# delegate to the appropriate subclass based on the given uuid.
- self.resource_class_for_uuid(uuid).unscoped.find_by_uuid(uuid)
+ self.resource_class_for_uuid(uuid).find_by_uuid(uuid)
else
super
end
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index ce7e9fe..c5fd96e 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -24,9 +24,6 @@ class Collection < ArvadosModel
validate :ensure_pdh_matches_manifest_text
before_save :set_file_names
- # Query only untrashed collections by default.
- default_scope { where("is_trashed = false") }
-
api_accessible :user, extend: :common do |t|
t.add :name
t.add :description
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index b3739da..edcb850 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -437,12 +437,10 @@ class Container < ArvadosModel
# that a container cannot "claim" a collection that it doesn't otherwise
# have access to just by setting the output field to the collection PDH.
if output_changed?
- c = Collection.unscoped do
- Collection.
- readable_by(current_user).
+ c = Collection.
+ readable_by(current_user, {include_trash: true}).
where(portable_data_hash: self.output).
first
- end
if !c
errors.add :output, "collection must exist and be readable by current user."
end
diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb
index 25becb2..3596bf3 100644
--- a/services/api/app/models/container_request.rb
+++ b/services/api/app/models/container_request.rb
@@ -120,9 +120,7 @@ class ContainerRequest < ArvadosModel
trash_at = db_current_time + self.output_ttl
end
end
- manifest = Collection.unscoped do
- Collection.where(portable_data_hash: pdh).first.manifest_text
- end
+ manifest = Collection.where(portable_data_hash: pdh).first.manifest_text
coll = Collection.new(owner_uuid: owner_uuid,
manifest_text: manifest,
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql
index 60fd88a..8f405c0 100644
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -3039,3 +3039,4 @@ INSERT INTO schema_migrations (version) VALUES ('20170906224040');
INSERT INTO schema_migrations (version) VALUES ('20171027183824');
INSERT INTO schema_migrations (version) VALUES ('20171208203841');
+
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb
index ba8f1e5..4360811 100644
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -352,9 +352,12 @@ class CollectionTest < ActiveSupport::TestCase
assert c.valid?
uuid = c.uuid
+ c = Collection.readable_by(current_user).where(uuid: uuid)
+ assert_not_empty c, 'Should be able to find live collection'
+
# mark collection as expired
- c.update_attributes!(trash_at: Time.new.strftime("%Y-%m-%d"))
- c = Collection.where(uuid: uuid)
+ c.first.update_attributes!(trash_at: Time.new.strftime("%Y-%m-%d"))
+ c = Collection.readable_by(current_user).where(uuid: uuid)
assert_empty c, 'Should not be able to find expired collection'
# recreate collection with the same name
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list