[ARVADOS] created: 2.1.0-36-g5915604c6
Git user
git at public.arvados.org
Mon Nov 2 21:36:49 UTC 2020
at 5915604c6c8f2f45b540cfc7c21b28f5e165d1a9 (commit)
commit 5915604c6c8f2f45b540cfc7c21b28f5e165d1a9
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Nov 2 16:20:02 2020 -0500
17040: Cache results of User.group_permissions
When requesting a list of groups (either directly, or with the
"shared" endpoint) it calls writable_by for each group. This
indirectly calls User.group_permissions, which makes a database query.
When it does this for every group (with a database query each time),
which gets very expensive.
To address this, this commit caches the result of group_permissions on
the user object.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/app/models/link.rb b/services/api/app/models/link.rb
index 0d7334e44..eb6ff4c6b 100644
--- a/services/api/app/models/link.rb
+++ b/services/api/app/models/link.rb
@@ -136,6 +136,7 @@ class Link < ArvadosModel
def call_update_permissions
if self.link_class == 'permission'
update_permissions tail_uuid, head_uuid, PERM_LEVEL[name], self.uuid
+ current_user.forget_cached_group_perms
end
end
diff --git a/services/api/app/models/user.rb b/services/api/app/models/user.rb
index 6f30b27a9..da7e7b310 100644
--- a/services/api/app/models/user.rb
+++ b/services/api/app/models/user.rb
@@ -161,6 +161,10 @@ SELECT 1 FROM #{PERMISSION_VIEW}
MaterializedPermission.where("user_uuid = ? and target_uuid != ?", uuid, uuid).delete_all
end
+ def forget_cached_group_perms
+ @group_perms = nil
+ end
+
def remove_self_from_permissions
MaterializedPermission.where("target_uuid = ?", uuid).delete_all
check_permissions_against_full_refresh
@@ -191,25 +195,35 @@ SELECT user_uuid, target_uuid, perm_level
# and perm_hash[:write] are true if this user can read and write
# objects owned by group_uuid.
def group_permissions(level=1)
- group_perms = {}
-
- user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: "$1", perm_level: "$2"}
+ @group_perms ||= {}
+ if @group_perms.empty?
+ user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: "$1", perm_level: 1}
- ActiveRecord::Base.connection.
- exec_query(%{
+ ActiveRecord::Base.connection.
+ exec_query(%{
SELECT target_uuid, perm_level
FROM #{PERMISSION_VIEW}
- WHERE user_uuid in (#{user_uuids_subquery}) and perm_level >= $2
+ WHERE user_uuid in (#{user_uuids_subquery}) and perm_level >= 1
},
- # "name" arg is a query label that appears in logs:
- "User.group_permissions",
- # "binds" arg is an array of [col_id, value] for '$1' vars:
- [[nil, uuid],
- [nil, level]]).
- rows.each do |group_uuid, max_p_val|
- group_perms[group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
+ # "name" arg is a query label that appears in logs:
+ "User.group_permissions",
+ # "binds" arg is an array of [col_id, value] for '$1' vars:
+ [[nil, uuid]]).
+ rows.each do |group_uuid, max_p_val|
+ @group_perms[group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
+ end
+ end
+
+ case level
+ when 1
+ @group_perms
+ when 2
+ @group_perms.select {|k,v| v[:write] }
+ when 3
+ @group_perms.select {|k,v| v[:manage] }
+ else
+ raise "level must be 1, 2 or 3"
end
- group_perms
end
# create links
@@ -251,6 +265,8 @@ SELECT target_uuid, perm_level
end
end
+ forget_cached_group_perms
+
return [repo_perm, vm_login_perm, group_perm, self].compact
end
@@ -290,6 +306,7 @@ SELECT target_uuid, perm_level
# mark the user as inactive
self.is_admin = false # can't be admin and inactive
self.is_active = false
+ forget_cached_group_perms
self.save!
end
diff --git a/services/api/lib/current_api_client.rb b/services/api/lib/current_api_client.rb
index dc40f158e..37e86976c 100644
--- a/services/api/lib/current_api_client.rb
+++ b/services/api/lib/current_api_client.rb
@@ -149,6 +149,9 @@ module CurrentApiClient
yield
ensure
Thread.current[:user] = user_was
+ if user_was
+ user_was.forget_cached_group_perms
+ end
end
end
diff --git a/services/api/test/test_helper.rb b/services/api/test/test_helper.rb
index 5dc77cb98..ee7dac4cd 100644
--- a/services/api/test/test_helper.rb
+++ b/services/api/test/test_helper.rb
@@ -123,6 +123,7 @@ class ActiveSupport::TestCase
def set_user_from_auth(auth_name)
client_auth = api_client_authorizations(auth_name)
+ client_auth.user.forget_cached_group_perms
Thread.current[:api_client_authorization] = client_auth
Thread.current[:api_client] = client_auth.api_client
Thread.current[:user] = client_auth.user
commit 98c01ebae43ea31b211b4890d182684f6ce4e356
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Nov 2 14:25:28 2020 -0500
17040: Get user_uuids and embed them as a constant in the main query
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb
index 1800e125d..e1ae76ed2 100644
--- a/services/api/app/controllers/application_controller.rb
+++ b/services/api/app/controllers/application_controller.rb
@@ -578,7 +578,7 @@ class ApplicationController < ActionController::Base
if @objects.respond_to? :except
list[:items_available] = @objects.
except(:limit).except(:offset).
- distinct.count(:id)
+ count(@distinct ? :id : '*')
end
when 'none'
else
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 86e946152..6a0a58f08 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -286,6 +286,7 @@ class ArvadosModel < ApplicationRecord
sql_conds = nil
user_uuids = users_list.map { |u| u.uuid }
+ all_user_uuids = []
# For details on how the trashed_groups table is constructed, see
# see db/migrate/20200501150153_permission_table.rb
@@ -319,12 +320,31 @@ class ArvadosModel < ApplicationRecord
# A user can have can_manage access to another user, this grants
# full access to all that user's stuff. To implement that we
# need to include those other users in the permission query.
- user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: ":user_uuids", perm_level: 1}
+
+ # This was previously implemented by embedding the subquery
+ # directly into the query, but it was discovered later that this
+ # causes the Postgres query planner to do silly things because
+ # the query heuristics assumed the subquery would have a lot
+ # more rows that it does, and choose a bad merge strategy. By
+ # doing the query here and embedding the result as a constant,
+ # Postgres also knows exactly how many items there are and can
+ # choose the right query strategy.
+ #
+ # (note: you could also do this with a temporary table, but that
+ # would require all every request be wrapped in a transaction,
+ # which is not currently the case).
+
+ all_user_uuids = ActiveRecord::Base.connection.exec_query %{
+#{USER_UUIDS_SUBQUERY_TEMPLATE % {user: "'#{user_uuids.join "', '"}'", perm_level: 1}}
+},
+ 'readable_by.user_uuids'
+
+ user_uuids_subquery = ":user_uuids"
# Note: it is possible to combine the direct_check and
- # owner_check into a single EXISTS() clause, however it turns
+ # owner_check into a single IN (SELECT) clause, however it turns
# out query optimizer doesn't like it and forces a sequential
- # table scan. Constructing the query with separate EXISTS()
+ # table scan. Constructing the query with separate IN (SELECT)
# clauses enables it to use the index.
#
# see issue 13208 for details.
@@ -353,6 +373,14 @@ class ArvadosModel < ApplicationRecord
if sql_table != "api_client_authorizations" and sql_table != "groups" then
owner_check = "#{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
"WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 AND traverse_owned) "
+
+ # We want to do owner_check before direct_check in the OR
+ # clause. The order of the OR clause isn't supposed to
+ # matter, but in practice, it does -- apparently in the
+ # absence of other hints, it uses the ordering from the query.
+ # For certain types of queries (like filtering on owner_uuid),
+ # every item will match the owner_check clause, so then
+ # Postgres will optimize out the direct_check entirely.
direct_check = " OR " + direct_check
end
@@ -379,7 +407,7 @@ class ArvadosModel < ApplicationRecord
end
self.where(sql_conds,
- user_uuids: user_uuids,
+ user_uuids: all_user_uuids.collect{|c| c["target_uuid"]},
permission_link_classes: ['permission', 'resources'])
end
commit 9db0e491e43bdb3a0bd050ed9be395fd7c3b16dc
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Oct 26 22:55:46 2020 -0400
17040: Refactor trash check
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 37f96c3ff..86e946152 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -296,21 +296,19 @@ class ArvadosModel < ApplicationRecord
exclude_trashed_records = "AND (#{sql_table}.trash_at is NULL or #{sql_table}.trash_at > statement_timestamp())"
end
+ trashed_check = ""
+ if !include_trash && sql_table != "api_client_authorizations"
+ trashed_check = "#{sql_table}.owner_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} " +
+ "where trash_at <= statement_timestamp()) #{exclude_trashed_records}"
+ end
+
if users_list.select { |u| u.is_admin }.any?
# Admin skips most permission checks, but still want to filter on trashed items.
- if !include_trash
- if sql_table != "api_client_authorizations"
- # Only include records where the owner is not trashed
- sql_conds = "#{sql_table}.owner_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} "+
- "where trash_at <= statement_timestamp()) #{exclude_trashed_records}"
- end
+ if !include_trash && sql_table != "api_client_authorizations"
+ # Only include records where the owner is not trashed
+ sql_conds = trashed_check
end
else
- trashed_check = ""
- if !include_trash then
- trashed_check = "AND target_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} where trash_at <= statement_timestamp())"
- end
-
# The core of the permission check is a join against the
# materialized_permissions table to determine if the user has at
# least read permission to either the object itself or its
@@ -333,7 +331,7 @@ class ArvadosModel < ApplicationRecord
# Match a direct read permission link from the user to the record uuid
direct_check = "#{sql_table}.uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
- "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check})"
+ "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1)"
# Match a read permission for the user to the record's
# owner_uuid. This is so we can have a permissions table that
@@ -354,7 +352,7 @@ class ArvadosModel < ApplicationRecord
owner_check = ""
if sql_table != "api_client_authorizations" and sql_table != "groups" then
owner_check = "#{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
- "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check} AND traverse_owned) "
+ "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 AND traverse_owned) "
direct_check = " OR " + direct_check
end
@@ -367,7 +365,7 @@ class ArvadosModel < ApplicationRecord
"(#{sql_table}.head_uuid IN (#{user_uuids_subquery}) OR #{sql_table}.tail_uuid IN (#{user_uuids_subquery})))"
end
- sql_conds = "(#{owner_check} #{direct_check} #{links_cond}) #{exclude_trashed_records}"
+ sql_conds = "(#{owner_check} #{direct_check} #{links_cond}) #{trashed_check.empty? ? "" : "AND"} #{trashed_check}"
end
diff --git a/services/api/lib/20200501150153_permission_table_constants.rb b/services/api/lib/20200501150153_permission_table_constants.rb
index 6e43a628c..74c15bc2e 100644
--- a/services/api/lib/20200501150153_permission_table_constants.rb
+++ b/services/api/lib/20200501150153_permission_table_constants.rb
@@ -63,7 +63,7 @@ def refresh_trashed
INSERT INTO #{TRASHED_GROUPS}
select ps.target_uuid as group_uuid, ps.trash_at from groups,
lateral project_subtree_with_trash_at(groups.uuid, groups.trash_at) ps
- where groups.owner_uuid like '_____-tpzed-_______________'
+ where groups.owner_uuid like '_____-tpzed-_______________' and ps.trash_at is not NULL
})
end
end
commit 7ab13e1ff6b899291feb82b80b0425f518762a8a
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Thu Oct 22 20:44:15 2020 -0400
17040: Swap the order of where clauses in the readable_by query
This seems work around a query planner bug in Postgres 9.5.
A hotfix provided to a customer resulted in dramatically better
performance.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 3966b7c39..37f96c3ff 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -353,8 +353,9 @@ class ArvadosModel < ApplicationRecord
# other user owns.
owner_check = ""
if sql_table != "api_client_authorizations" and sql_table != "groups" then
- owner_check = "OR #{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
- "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check} AND traverse_owned) "
+ owner_check = "#{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
+ "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check} AND traverse_owned) "
+ direct_check = " OR " + direct_check
end
links_cond = ""
@@ -366,7 +367,7 @@ class ArvadosModel < ApplicationRecord
"(#{sql_table}.head_uuid IN (#{user_uuids_subquery}) OR #{sql_table}.tail_uuid IN (#{user_uuids_subquery})))"
end
- sql_conds = "(#{direct_check} #{owner_check} #{links_cond}) #{exclude_trashed_records}"
+ sql_conds = "(#{owner_check} #{direct_check} #{links_cond}) #{exclude_trashed_records}"
end
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list