[ARVADOS] updated: 2.1.0-42-g4f9de1f00
Git user
git at public.arvados.org
Thu Nov 5 20:16:47 UTC 2020
Summary of changes:
.../api/app/controllers/application_controller.rb | 2 +-
services/api/app/models/arvados_model.rb | 65 +++++++++++++++-------
services/api/app/models/link.rb | 2 +
services/api/app/models/user.rb | 45 ++++++++++-----
.../20201103170213_refresh_trashed_groups.rb | 17 ++++++
.../migrate/20201105190435_refresh_permissions.rb | 15 +++++
services/api/db/structure.sql | 18 +-----
.../20200501150153_permission_table_constants.rb | 2 +-
services/api/lib/current_api_client.rb | 3 +
services/api/lib/update_permissions.rb | 10 ++--
services/api/test/test_helper.rb | 1 +
11 files changed, 126 insertions(+), 54 deletions(-)
create mode 100644 services/api/db/migrate/20201103170213_refresh_trashed_groups.rb
create mode 100644 services/api/db/migrate/20201105190435_refresh_permissions.rb
via 4f9de1f006ce329f76eb3c73a1b819b2ad60f67f (commit)
via b5daec3c57c4986ea78b48547b9070b8c2a3bdf9 (commit)
via a4aedfd86f4bc97267e890d49e6f829dc5e50e9c (commit)
via a16b34aabc167b456bb391151f9a27d9c201d950 (commit)
via a9b64985f573edb9a3a360afc245e32175faa5cc (commit)
via b2c7e3d09f2ee51ca1938c397f29e8627840e61e (commit)
via 5d994a7f96413195c9e19a71ae26a05ad3e9c910 (commit)
via 58b81c0ba0b949a18d4c0fdf772a414352ce4861 (commit)
from 8506a887dd99eea1f8e9c5c0f127db5d2b3197bb (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 4f9de1f006ce329f76eb3c73a1b819b2ad60f67f
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Thu Nov 5 14:55:35 2020 -0500
17090: Migration to refresh_permissions
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/db/migrate/20201105190435_refresh_permissions.rb b/services/api/db/migrate/20201105190435_refresh_permissions.rb
new file mode 100644
index 000000000..1ced9d228
--- /dev/null
+++ b/services/api/db/migrate/20201105190435_refresh_permissions.rb
@@ -0,0 +1,15 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require '20200501150153_permission_table_constants'
+
+class RefreshPermissions < ActiveRecord::Migration[5.2]
+ def change
+ # There was a report of deadlocks resulting in failing permission
+ # updates. These failures should not have corrupted permissions
+ # (the failure should have rolled back the entire update) but we
+ # will refresh the permissions out of an abundance of caution.
+ refresh_permissions
+ end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql
index 6f9cee270..58c064ac3 100644
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -3185,6 +3185,7 @@ INSERT INTO "schema_migrations" (version) VALUES
('20200501150153'),
('20200602141328'),
('20200914203202'),
-('20201103170213');
+('20201103170213'),
+('20201105190435');
commit b5daec3c57c4986ea78b48547b9070b8c2a3bdf9
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Thu Nov 5 11:09:30 2020 -0500
17090: Use EXCLUSIVE lock for permission updates
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/lib/update_permissions.rb b/services/api/lib/update_permissions.rb
index 7b1b900ca..23e60c8ed 100644
--- a/services/api/lib/update_permissions.rb
+++ b/services/api/lib/update_permissions.rb
@@ -62,10 +62,12 @@ def update_permissions perm_origin_uuid, starting_uuid, perm_level, edge_id=nil
ActiveRecord::Base.transaction do
- # "Conflicts with the ROW EXCLUSIVE, SHARE UPDATE EXCLUSIVE, SHARE
- # ROW EXCLUSIVE, EXCLUSIVE, and ACCESS EXCLUSIVE lock modes. This
- # mode protects a table against concurrent data changes."
- ActiveRecord::Base.connection.execute "LOCK TABLE #{PERMISSION_VIEW} in SHARE MODE"
+ # "Conflicts with the ROW SHARE, ROW EXCLUSIVE, SHARE UPDATE
+ # EXCLUSIVE, SHARE, SHARE ROW EXCLUSIVE, EXCLUSIVE, and ACCESS
+ # EXCLUSIVE lock modes. This mode allows only concurrent ACCESS
+ # SHARE locks, i.e., only reads from the table can proceed in
+ # parallel with a transaction holding this lock mode."
+ ActiveRecord::Base.connection.execute "LOCK TABLE #{PERMISSION_VIEW} in EXCLUSIVE MODE"
# Workaround for
# BUG #15160: planner overestimates number of rows in join when there are more than 200 rows coming from CTE
commit a4aedfd86f4bc97267e890d49e6f829dc5e50e9c
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Tue Nov 3 13:01:57 2020 -0500
17040: Update structure.sql
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql
index a5740834c..6f9cee270 100644
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -10,20 +10,6 @@ SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
---
--- Name: plpgsql; Type: EXTENSION; Schema: -; Owner: -
---
-
-CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
-
-
---
--- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner: -
---
-
--- COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
-
-
--
-- Name: pg_trgm; Type: EXTENSION; Schema: -; Owner: -
--
@@ -3198,6 +3184,7 @@ INSERT INTO "schema_migrations" (version) VALUES
('20190905151603'),
('20200501150153'),
('20200602141328'),
-('20200914203202');
+('20200914203202'),
+('20201103170213');
commit a16b34aabc167b456bb391151f9a27d9c201d950
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Tue Nov 3 12:12:59 2020 -0500
17040: RefreshTrashedGroups migration, clear_permissions calls forget_cached_group_perms
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/app/models/link.rb b/services/api/app/models/link.rb
index eb6ff4c6b..83043a56d 100644
--- a/services/api/app/models/link.rb
+++ b/services/api/app/models/link.rb
@@ -143,6 +143,7 @@ class Link < ArvadosModel
def clear_permissions
if self.link_class == 'permission'
update_permissions tail_uuid, head_uuid, REVOKE_PERM, self.uuid
+ current_user.forget_cached_group_perms
end
end
diff --git a/services/api/db/migrate/20201103170213_refresh_trashed_groups.rb b/services/api/db/migrate/20201103170213_refresh_trashed_groups.rb
new file mode 100644
index 000000000..4e8c245c8
--- /dev/null
+++ b/services/api/db/migrate/20201103170213_refresh_trashed_groups.rb
@@ -0,0 +1,17 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require '20200501150153_permission_table_constants'
+
+class RefreshTrashedGroups < ActiveRecord::Migration[5.2]
+ def change
+ # The original refresh_trashed query had a bug, it would insert
+ # all trashed rows, including those with null trash_at times.
+ # This went unnoticed because null trash_at behaved the same as
+ # not having those rows at all, but it is inefficient to fetch
+ # rows we'll never use. That bug is fixed in the original query
+ # but we need another migration to make sure it runs.
+ refresh_trashed
+ end
+end
commit a9b64985f573edb9a3a360afc245e32175faa5cc
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Nov 2 16:20:02 2020 -0500
17040: Cache results of User.group_permissions
When requesting a list of groups (either directly, or with the
"shared" endpoint) it calls writable_by for each group. This
indirectly calls User.group_permissions, which makes a database query.
When it does this for every group (with a database query each time),
which gets very expensive.
To address this, this commit caches the result of group_permissions on
the user object.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/app/models/link.rb b/services/api/app/models/link.rb
index 0d7334e44..eb6ff4c6b 100644
--- a/services/api/app/models/link.rb
+++ b/services/api/app/models/link.rb
@@ -136,6 +136,7 @@ class Link < ArvadosModel
def call_update_permissions
if self.link_class == 'permission'
update_permissions tail_uuid, head_uuid, PERM_LEVEL[name], self.uuid
+ current_user.forget_cached_group_perms
end
end
diff --git a/services/api/app/models/user.rb b/services/api/app/models/user.rb
index 6f30b27a9..da7e7b310 100644
--- a/services/api/app/models/user.rb
+++ b/services/api/app/models/user.rb
@@ -161,6 +161,10 @@ SELECT 1 FROM #{PERMISSION_VIEW}
MaterializedPermission.where("user_uuid = ? and target_uuid != ?", uuid, uuid).delete_all
end
+ def forget_cached_group_perms
+ @group_perms = nil
+ end
+
def remove_self_from_permissions
MaterializedPermission.where("target_uuid = ?", uuid).delete_all
check_permissions_against_full_refresh
@@ -191,25 +195,35 @@ SELECT user_uuid, target_uuid, perm_level
# and perm_hash[:write] are true if this user can read and write
# objects owned by group_uuid.
def group_permissions(level=1)
- group_perms = {}
-
- user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: "$1", perm_level: "$2"}
+ @group_perms ||= {}
+ if @group_perms.empty?
+ user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: "$1", perm_level: 1}
- ActiveRecord::Base.connection.
- exec_query(%{
+ ActiveRecord::Base.connection.
+ exec_query(%{
SELECT target_uuid, perm_level
FROM #{PERMISSION_VIEW}
- WHERE user_uuid in (#{user_uuids_subquery}) and perm_level >= $2
+ WHERE user_uuid in (#{user_uuids_subquery}) and perm_level >= 1
},
- # "name" arg is a query label that appears in logs:
- "User.group_permissions",
- # "binds" arg is an array of [col_id, value] for '$1' vars:
- [[nil, uuid],
- [nil, level]]).
- rows.each do |group_uuid, max_p_val|
- group_perms[group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
+ # "name" arg is a query label that appears in logs:
+ "User.group_permissions",
+ # "binds" arg is an array of [col_id, value] for '$1' vars:
+ [[nil, uuid]]).
+ rows.each do |group_uuid, max_p_val|
+ @group_perms[group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
+ end
+ end
+
+ case level
+ when 1
+ @group_perms
+ when 2
+ @group_perms.select {|k,v| v[:write] }
+ when 3
+ @group_perms.select {|k,v| v[:manage] }
+ else
+ raise "level must be 1, 2 or 3"
end
- group_perms
end
# create links
@@ -251,6 +265,8 @@ SELECT target_uuid, perm_level
end
end
+ forget_cached_group_perms
+
return [repo_perm, vm_login_perm, group_perm, self].compact
end
@@ -290,6 +306,7 @@ SELECT target_uuid, perm_level
# mark the user as inactive
self.is_admin = false # can't be admin and inactive
self.is_active = false
+ forget_cached_group_perms
self.save!
end
diff --git a/services/api/lib/current_api_client.rb b/services/api/lib/current_api_client.rb
index dc40f158e..37e86976c 100644
--- a/services/api/lib/current_api_client.rb
+++ b/services/api/lib/current_api_client.rb
@@ -149,6 +149,9 @@ module CurrentApiClient
yield
ensure
Thread.current[:user] = user_was
+ if user_was
+ user_was.forget_cached_group_perms
+ end
end
end
diff --git a/services/api/test/test_helper.rb b/services/api/test/test_helper.rb
index 5dc77cb98..ee7dac4cd 100644
--- a/services/api/test/test_helper.rb
+++ b/services/api/test/test_helper.rb
@@ -123,6 +123,7 @@ class ActiveSupport::TestCase
def set_user_from_auth(auth_name)
client_auth = api_client_authorizations(auth_name)
+ client_auth.user.forget_cached_group_perms
Thread.current[:api_client_authorization] = client_auth
Thread.current[:api_client] = client_auth.api_client
Thread.current[:user] = client_auth.user
commit b2c7e3d09f2ee51ca1938c397f29e8627840e61e
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Nov 2 14:25:28 2020 -0500
17040: Get user_uuids and embed them as a constant in the main query
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb
index 1800e125d..e1ae76ed2 100644
--- a/services/api/app/controllers/application_controller.rb
+++ b/services/api/app/controllers/application_controller.rb
@@ -578,7 +578,7 @@ class ApplicationController < ActionController::Base
if @objects.respond_to? :except
list[:items_available] = @objects.
except(:limit).except(:offset).
- distinct.count(:id)
+ count(@distinct ? :id : '*')
end
when 'none'
else
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 86e946152..6a0a58f08 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -286,6 +286,7 @@ class ArvadosModel < ApplicationRecord
sql_conds = nil
user_uuids = users_list.map { |u| u.uuid }
+ all_user_uuids = []
# For details on how the trashed_groups table is constructed, see
# see db/migrate/20200501150153_permission_table.rb
@@ -319,12 +320,31 @@ class ArvadosModel < ApplicationRecord
# A user can have can_manage access to another user, this grants
# full access to all that user's stuff. To implement that we
# need to include those other users in the permission query.
- user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: ":user_uuids", perm_level: 1}
+
+ # This was previously implemented by embedding the subquery
+ # directly into the query, but it was discovered later that this
+ # causes the Postgres query planner to do silly things because
+ # the query heuristics assumed the subquery would have a lot
+ # more rows that it does, and choose a bad merge strategy. By
+ # doing the query here and embedding the result as a constant,
+ # Postgres also knows exactly how many items there are and can
+ # choose the right query strategy.
+ #
+ # (note: you could also do this with a temporary table, but that
+ # would require all every request be wrapped in a transaction,
+ # which is not currently the case).
+
+ all_user_uuids = ActiveRecord::Base.connection.exec_query %{
+#{USER_UUIDS_SUBQUERY_TEMPLATE % {user: "'#{user_uuids.join "', '"}'", perm_level: 1}}
+},
+ 'readable_by.user_uuids'
+
+ user_uuids_subquery = ":user_uuids"
# Note: it is possible to combine the direct_check and
- # owner_check into a single EXISTS() clause, however it turns
+ # owner_check into a single IN (SELECT) clause, however it turns
# out query optimizer doesn't like it and forces a sequential
- # table scan. Constructing the query with separate EXISTS()
+ # table scan. Constructing the query with separate IN (SELECT)
# clauses enables it to use the index.
#
# see issue 13208 for details.
@@ -353,6 +373,14 @@ class ArvadosModel < ApplicationRecord
if sql_table != "api_client_authorizations" and sql_table != "groups" then
owner_check = "#{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
"WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 AND traverse_owned) "
+
+ # We want to do owner_check before direct_check in the OR
+ # clause. The order of the OR clause isn't supposed to
+ # matter, but in practice, it does -- apparently in the
+ # absence of other hints, it uses the ordering from the query.
+ # For certain types of queries (like filtering on owner_uuid),
+ # every item will match the owner_check clause, so then
+ # Postgres will optimize out the direct_check entirely.
direct_check = " OR " + direct_check
end
@@ -379,7 +407,7 @@ class ArvadosModel < ApplicationRecord
end
self.where(sql_conds,
- user_uuids: user_uuids,
+ user_uuids: all_user_uuids.collect{|c| c["target_uuid"]},
permission_link_classes: ['permission', 'resources'])
end
commit 5d994a7f96413195c9e19a71ae26a05ad3e9c910
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Oct 26 22:55:46 2020 -0400
17040: Refactor trash check
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 37f96c3ff..86e946152 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -296,21 +296,19 @@ class ArvadosModel < ApplicationRecord
exclude_trashed_records = "AND (#{sql_table}.trash_at is NULL or #{sql_table}.trash_at > statement_timestamp())"
end
+ trashed_check = ""
+ if !include_trash && sql_table != "api_client_authorizations"
+ trashed_check = "#{sql_table}.owner_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} " +
+ "where trash_at <= statement_timestamp()) #{exclude_trashed_records}"
+ end
+
if users_list.select { |u| u.is_admin }.any?
# Admin skips most permission checks, but still want to filter on trashed items.
- if !include_trash
- if sql_table != "api_client_authorizations"
- # Only include records where the owner is not trashed
- sql_conds = "#{sql_table}.owner_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} "+
- "where trash_at <= statement_timestamp()) #{exclude_trashed_records}"
- end
+ if !include_trash && sql_table != "api_client_authorizations"
+ # Only include records where the owner is not trashed
+ sql_conds = trashed_check
end
else
- trashed_check = ""
- if !include_trash then
- trashed_check = "AND target_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} where trash_at <= statement_timestamp())"
- end
-
# The core of the permission check is a join against the
# materialized_permissions table to determine if the user has at
# least read permission to either the object itself or its
@@ -333,7 +331,7 @@ class ArvadosModel < ApplicationRecord
# Match a direct read permission link from the user to the record uuid
direct_check = "#{sql_table}.uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
- "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check})"
+ "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1)"
# Match a read permission for the user to the record's
# owner_uuid. This is so we can have a permissions table that
@@ -354,7 +352,7 @@ class ArvadosModel < ApplicationRecord
owner_check = ""
if sql_table != "api_client_authorizations" and sql_table != "groups" then
owner_check = "#{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
- "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check} AND traverse_owned) "
+ "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 AND traverse_owned) "
direct_check = " OR " + direct_check
end
@@ -367,7 +365,7 @@ class ArvadosModel < ApplicationRecord
"(#{sql_table}.head_uuid IN (#{user_uuids_subquery}) OR #{sql_table}.tail_uuid IN (#{user_uuids_subquery})))"
end
- sql_conds = "(#{owner_check} #{direct_check} #{links_cond}) #{exclude_trashed_records}"
+ sql_conds = "(#{owner_check} #{direct_check} #{links_cond}) #{trashed_check.empty? ? "" : "AND"} #{trashed_check}"
end
diff --git a/services/api/lib/20200501150153_permission_table_constants.rb b/services/api/lib/20200501150153_permission_table_constants.rb
index 6e43a628c..74c15bc2e 100644
--- a/services/api/lib/20200501150153_permission_table_constants.rb
+++ b/services/api/lib/20200501150153_permission_table_constants.rb
@@ -63,7 +63,7 @@ def refresh_trashed
INSERT INTO #{TRASHED_GROUPS}
select ps.target_uuid as group_uuid, ps.trash_at from groups,
lateral project_subtree_with_trash_at(groups.uuid, groups.trash_at) ps
- where groups.owner_uuid like '_____-tpzed-_______________'
+ where groups.owner_uuid like '_____-tpzed-_______________' and ps.trash_at is not NULL
})
end
end
commit 58b81c0ba0b949a18d4c0fdf772a414352ce4861
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Thu Oct 22 20:44:15 2020 -0400
17040: Swap the order of where clauses in the readable_by query
This seems work around a query planner bug in Postgres 9.5.
A hotfix provided to a customer resulted in dramatically better
performance.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 3966b7c39..37f96c3ff 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -353,8 +353,9 @@ class ArvadosModel < ApplicationRecord
# other user owns.
owner_check = ""
if sql_table != "api_client_authorizations" and sql_table != "groups" then
- owner_check = "OR #{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
- "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check} AND traverse_owned) "
+ owner_check = "#{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
+ "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check} AND traverse_owned) "
+ direct_check = " OR " + direct_check
end
links_cond = ""
@@ -366,7 +367,7 @@ class ArvadosModel < ApplicationRecord
"(#{sql_table}.head_uuid IN (#{user_uuids_subquery}) OR #{sql_table}.tail_uuid IN (#{user_uuids_subquery})))"
end
- sql_conds = "(#{direct_check} #{owner_check} #{links_cond}) #{exclude_trashed_records}"
+ sql_conds = "(#{owner_check} #{direct_check} #{links_cond}) #{exclude_trashed_records}"
end
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list