[arvados] created: 2.7.0-2-g26510a8ee0

git repository hosting git at public.arvados.org
Mon Oct 2 21:16:57 UTC 2023


        at  26510a8ee080eac922abd6c981e2f077fe1a2f58 (commit)


commit 26510a8ee080eac922abd6c981e2f077fe1a2f58
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Oct 2 17:09:05 2023 -0400

    21030: Use a CTE instead of temporary table for trash and frozen
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/api/app/models/group.rb b/services/api/app/models/group.rb
index 5c0aeba589..0c9248b819 100644
--- a/services/api/app/models/group.rb
+++ b/services/api/app/models/group.rb
@@ -163,63 +163,70 @@ class Group < ArvadosModel
     #   Remove groups that don't belong from trash
     #   Add/update groups that do belong in the trash
 
-    temptable = "group_subtree_#{rand(2**64).to_s(10)}"
-    ActiveRecord::Base.connection.exec_query(
-      "create temporary table #{temptable} on commit drop " +
-      "as select * from project_subtree_with_trash_at($1, LEAST($2, $3)::timestamp)",
+    frozen_descendants = ActiveRecord::Base.connection.exec_query(%{
+with temptable as (select * from project_subtree_with_trash_at($1, LEAST($2, $3)::timestamp))
+      select uuid from frozen_groups, temptable where uuid = target_uuid
+},
       "Group.update_trash.select",
       [[nil, self.uuid],
        [nil, TrashedGroup.find_by_group_uuid(self.owner_uuid).andand.trash_at],
        [nil, self.trash_at]])
-    frozen_descendants = ActiveRecord::Base.connection.exec_query(
-      "select uuid from frozen_groups, #{temptable} where uuid = target_uuid",
-      "Group.update_trash.check_frozen")
     if frozen_descendants.any?
       raise ArgumentError.new("cannot trash project containing frozen project #{frozen_descendants[0]["uuid"]}")
     end
-    ActiveRecord::Base.connection.exec_delete(
-      "delete from trashed_groups where group_uuid in (select target_uuid from #{temptable} where trash_at is NULL)",
-      "Group.update_trash.delete")
-    ActiveRecord::Base.connection.exec_query(
-      "insert into trashed_groups (group_uuid, trash_at) "+
-      "select target_uuid as group_uuid, trash_at from #{temptable} where trash_at is not NULL " +
-      "on conflict (group_uuid) do update set trash_at=EXCLUDED.trash_at",
-      "Group.update_trash.insert")
-    ActiveRecord::Base.connection.exec_query(
-      "select container_uuid from container_requests where " +
-      "owner_uuid in (select target_uuid from #{temptable}) and " +
-      "requesting_container_uuid is NULL and state = 'Committed' and container_uuid is not NULL",
-      "Group.update_trash.update_priorities").each do |container_uuid|
+
+    ActiveRecord::Base.connection.exec_query(%{
+with temptable as (select * from project_subtree_with_trash_at($1, LEAST($2, $3)::timestamp)),
+
+delete_rows as (delete from trashed_groups where group_uuid in (select target_uuid from temptable where trash_at is NULL)),
+
+insert_rows as (insert into trashed_groups (group_uuid, trash_at)
+  select target_uuid as group_uuid, trash_at from temptable where trash_at is not NULL
+  on conflict (group_uuid) do update set trash_at=EXCLUDED.trash_at)
+
+select container_uuid from container_requests where
+  owner_uuid in (select target_uuid from temptable) and
+  requesting_container_uuid is NULL and state = 'Committed' and container_uuid is not NULL
+},
+      "Group.update_trash.select",
+      [[nil, self.uuid],
+       [nil, TrashedGroup.find_by_group_uuid(self.owner_uuid).andand.trash_at],
+       [nil, self.trash_at]]).each do |container_uuid|
       update_priorities container_uuid["container_uuid"]
     end
   end
 
   def update_frozen
     return unless saved_change_to_frozen_by_uuid? || saved_change_to_owner_uuid?
-    temptable = "group_subtree_#{rand(2**64).to_s(10)}"
-    ActiveRecord::Base.connection.exec_query(
-      "create temporary table #{temptable} on commit drop as select * from project_subtree_with_is_frozen($1,$2)",
-      "Group.update_frozen.select",
-      [[nil, self.uuid],
-       [nil, !self.frozen_by_uuid.nil?]])
+
     if frozen_by_uuid
-      rows = ActiveRecord::Base.connection.exec_query(
-        "select cr.uuid, cr.state from container_requests cr, #{temptable} frozen " +
-        "where cr.owner_uuid = frozen.uuid and frozen.is_frozen " +
-        "and cr.state not in ($1, $2) limit 1",
-        "Group.update_frozen.check_container_requests",
-        [[nil, ContainerRequest::Uncommitted],
-         [nil, ContainerRequest::Final]])
+      rows = ActiveRecord::Base.connection.exec_query(%{
+with temptable as (select * from project_subtree_with_is_frozen($1,$2))
+
+select cr.uuid, cr.state from container_requests cr, temptable frozen
+  where cr.owner_uuid = frozen.uuid and frozen.is_frozen
+  and cr.state not in ($3, $4) limit 1
+},
+                                                      "Group.update_frozen.check_container_requests",
+                                                      [[nil, self.uuid],
+                                                       [nil, !self.frozen_by_uuid.nil?],
+                                                       [nil, ContainerRequest::Uncommitted],
+                                                       [nil, ContainerRequest::Final]])
       if rows.any?
         raise ArgumentError.new("cannot freeze project containing container request #{rows.first['uuid']} with state = #{rows.first['state']}")
       end
     end
-    ActiveRecord::Base.connection.exec_delete(
-      "delete from frozen_groups where uuid in (select uuid from #{temptable} where not is_frozen)",
-      "Group.update_frozen.delete")
-    ActiveRecord::Base.connection.exec_query(
-      "insert into frozen_groups (uuid) select uuid from #{temptable} where is_frozen on conflict do nothing",
-      "Group.update_frozen.insert")
+
+ActiveRecord::Base.connection.exec_query(%{
+with temptable as (select * from project_subtree_with_is_frozen($1,$2)),
+
+delete_rows as (delete from frozen_groups where uuid in (select uuid from temptable where not is_frozen))
+
+insert into frozen_groups (uuid) select uuid from temptable where is_frozen on conflict do nothing
+}, "Group.update_frozen.update",
+                                         [[nil, self.uuid],
+                                          [nil, !self.frozen_by_uuid.nil?]])
+
   end
 
   def before_ownership_change

commit 841194f2456f9f4874fe6ebe1e4639c0b0421b97
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Oct 2 16:37:53 2023 -0400

    21030: Use a CTE instead of temporary table for update_permissions
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/api/lib/update_permissions.rb b/services/api/lib/update_permissions.rb
index b7e5476404..138d287f7f 100644
--- a/services/api/lib/update_permissions.rb
+++ b/services/api/lib/update_permissions.rb
@@ -69,75 +69,32 @@ def update_permissions perm_origin_uuid, starting_uuid, perm_level, edge_id=nil
     # parallel with a transaction holding this lock mode."
     ActiveRecord::Base.connection.execute "LOCK TABLE #{PERMISSION_VIEW} in EXCLUSIVE MODE"
 
-    # Workaround for
-    # BUG #15160: planner overestimates number of rows in join when there are more than 200 rows coming from CTE
-    # https://www.postgresql.org/message-id/152395805004.19366.3107109716821067806@wrigleys.postgresql.org
-    #
-    # For a crucial join in the compute_permission_subgraph() query, the
-    # planner mis-estimates the number of rows in a Common Table
-    # Expression (CTE, this is a subquery in a WITH clause) and as a
-    # result it chooses the wrong join order.  The join starts with the
-    # permissions table because it mistakenly thinks
-    # count(materalized_permissions) < count(new computed permissions)
-    # when actually it is the other way around.
-    #
-    # Because of the incorrect join order, it choose the wrong join
-    # strategy (merge join, which works best when two tables are roughly
-    # the same size).  As a workaround, we can tell it not to use that
-    # join strategy, this causes it to pick hash join instead, which
-    # turns out to be a bit better.  However, because the join order is
-    # still wrong, we don't get the full benefit of the index.
-    #
-    # This is very unfortunate because it makes the query performance
-    # dependent on the size of the materalized_permissions table, when
-    # the goal of this design was to make permission updates scale-free
-    # and only depend on the number of permissions affected and not the
-    # total table size.  In several hours of researching I wasn't able
-    # to find a way to force the correct join order, so I'm calling it
-    # here and I have to move on.
-    #
-    # This is apparently addressed in Postgres 12, but I developed &
-    # tested this on Postgres 9.6, so in the future we should reevaluate
-    # the performance & query plan on Postgres 12.
-    #
-    # https://git.furworks.de/opensourcemirror/postgresql/commit/a314c34079cf06d05265623dd7c056f8fa9d577f
-    #
-    # Disable merge join for just this query (also local for this transaction), then reenable it.
-    ActiveRecord::Base.connection.exec_query "SET LOCAL enable_mergejoin to false;"
-
-    temptable_perms = "temp_perms_#{rand(2**64).to_s(10)}"
-    ActiveRecord::Base.connection.exec_query %{
-create temporary table #{temptable_perms} on commit drop
-as select * from compute_permission_subgraph($1, $2, $3, $4)
-},
-                                             'update_permissions.select',
-                                             [[nil, perm_origin_uuid],
-                                              [nil, starting_uuid],
-                                              [nil, perm_level],
-                                              [nil, edge_id]]
-
-    ActiveRecord::Base.connection.exec_query "SET LOCAL enable_mergejoin to true;"
-
-    # Now that we have recomputed a set of permissions, delete any
-    # rows from the materialized_permissions table where (target_uuid,
-    # user_uuid) is not present or has perm_level=0 in the recomputed
-    # set.
-    ActiveRecord::Base.connection.exec_delete %{
-delete from #{PERMISSION_VIEW} where
-  target_uuid in (select target_uuid from #{temptable_perms}) and
-  not exists (select 1 from #{temptable_perms}
-              where target_uuid=#{PERMISSION_VIEW}.target_uuid and
-                    user_uuid=#{PERMISSION_VIEW}.user_uuid and
-                    val>0)
-},
-                                              "update_permissions.delete"
-
-    # Now insert-or-update permissions in the recomputed set.  The
-    # WHERE clause is important to avoid redundantly updating rows
-    # that haven't actually changed.
     ActiveRecord::Base.connection.exec_query %{
+with temptable_perms as (
+  select * from compute_permission_subgraph($1, $2, $3, $4)),
+
+/*
+    Now that we have recomputed a set of permissions, delete any
+    rows from the materialized_permissions table where (target_uuid,
+    user_uuid) is not present or has perm_level=0 in the recomputed
+    set.
+*/
+delete_rows as (
+  delete from #{PERMISSION_VIEW} where
+    target_uuid in (select target_uuid from temptable_perms) and
+    not exists (select 1 from temptable_perms
+                where target_uuid=#{PERMISSION_VIEW}.target_uuid and
+                      user_uuid=#{PERMISSION_VIEW}.user_uuid and
+                      val>0)
+)
+
+/*
+  Now insert-or-update permissions in the recomputed set.  The
+  WHERE clause is important to avoid redundantly updating rows
+  that haven't actually changed.
+*/
 insert into #{PERMISSION_VIEW} (user_uuid, target_uuid, perm_level, traverse_owned)
-  select user_uuid, target_uuid, val as perm_level, traverse_owned from #{temptable_perms} where val>0
+  select user_uuid, target_uuid, val as perm_level, traverse_owned from temptable_perms where val>0
 on conflict (user_uuid, target_uuid) do update
 set perm_level=EXCLUDED.perm_level, traverse_owned=EXCLUDED.traverse_owned
 where #{PERMISSION_VIEW}.user_uuid=EXCLUDED.user_uuid and
@@ -145,7 +102,11 @@ where #{PERMISSION_VIEW}.user_uuid=EXCLUDED.user_uuid and
        (#{PERMISSION_VIEW}.perm_level != EXCLUDED.perm_level or
         #{PERMISSION_VIEW}.traverse_owned != EXCLUDED.traverse_owned);
 },
-                                             "update_permissions.insert"
+                                             'update_permissions.select',
+                                             [[nil, perm_origin_uuid],
+                                              [nil, starting_uuid],
+                                              [nil, perm_level],
+                                              [nil, edge_id]]
 
     if perm_level>0
       check_permissions_against_full_refresh

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list