[ARVADOS] created: 2.1.0-36-g5915604c6

Git user git at public.arvados.org
Mon Nov 2 21:36:49 UTC 2020


        at  5915604c6c8f2f45b540cfc7c21b28f5e165d1a9 (commit)


commit 5915604c6c8f2f45b540cfc7c21b28f5e165d1a9
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Nov 2 16:20:02 2020 -0500

    17040: Cache results of User.group_permissions
    
    When requesting a list of groups (either directly, or with the
    "shared" endpoint) it calls writable_by for each group.  This
    indirectly calls User.group_permissions, which makes a database query.
    When it does this for every group (with a database query each time),
    which gets very expensive.
    
    To address this, this commit caches the result of group_permissions on
    the user object.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/api/app/models/link.rb b/services/api/app/models/link.rb
index 0d7334e44..eb6ff4c6b 100644
--- a/services/api/app/models/link.rb
+++ b/services/api/app/models/link.rb
@@ -136,6 +136,7 @@ class Link < ArvadosModel
   def call_update_permissions
     if self.link_class == 'permission'
       update_permissions tail_uuid, head_uuid, PERM_LEVEL[name], self.uuid
+      current_user.forget_cached_group_perms
     end
   end
 
diff --git a/services/api/app/models/user.rb b/services/api/app/models/user.rb
index 6f30b27a9..da7e7b310 100644
--- a/services/api/app/models/user.rb
+++ b/services/api/app/models/user.rb
@@ -161,6 +161,10 @@ SELECT 1 FROM #{PERMISSION_VIEW}
     MaterializedPermission.where("user_uuid = ? and target_uuid != ?", uuid, uuid).delete_all
   end
 
+  def forget_cached_group_perms
+    @group_perms = nil
+  end
+
   def remove_self_from_permissions
     MaterializedPermission.where("target_uuid = ?", uuid).delete_all
     check_permissions_against_full_refresh
@@ -191,25 +195,35 @@ SELECT user_uuid, target_uuid, perm_level
   # and perm_hash[:write] are true if this user can read and write
   # objects owned by group_uuid.
   def group_permissions(level=1)
-    group_perms = {}
-
-    user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: "$1", perm_level: "$2"}
+    @group_perms ||= {}
+    if @group_perms.empty?
+      user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: "$1", perm_level: 1}
 
-    ActiveRecord::Base.connection.
-      exec_query(%{
+      ActiveRecord::Base.connection.
+        exec_query(%{
 SELECT target_uuid, perm_level
   FROM #{PERMISSION_VIEW}
-  WHERE user_uuid in (#{user_uuids_subquery}) and perm_level >= $2
+  WHERE user_uuid in (#{user_uuids_subquery}) and perm_level >= 1
 },
-                  # "name" arg is a query label that appears in logs:
-                  "User.group_permissions",
-                  # "binds" arg is an array of [col_id, value] for '$1' vars:
-                  [[nil, uuid],
-                   [nil, level]]).
-      rows.each do |group_uuid, max_p_val|
-      group_perms[group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
+                   # "name" arg is a query label that appears in logs:
+                   "User.group_permissions",
+                   # "binds" arg is an array of [col_id, value] for '$1' vars:
+                   [[nil, uuid]]).
+        rows.each do |group_uuid, max_p_val|
+        @group_perms[group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
+      end
+    end
+
+    case level
+    when 1
+      @group_perms
+    when 2
+      @group_perms.select {|k,v| v[:write] }
+    when 3
+      @group_perms.select {|k,v| v[:manage] }
+    else
+      raise "level must be 1, 2 or 3"
     end
-    group_perms
   end
 
   # create links
@@ -251,6 +265,8 @@ SELECT target_uuid, perm_level
       end
     end
 
+    forget_cached_group_perms
+
     return [repo_perm, vm_login_perm, group_perm, self].compact
   end
 
@@ -290,6 +306,7 @@ SELECT target_uuid, perm_level
     # mark the user as inactive
     self.is_admin = false  # can't be admin and inactive
     self.is_active = false
+    forget_cached_group_perms
     self.save!
   end
 
diff --git a/services/api/lib/current_api_client.rb b/services/api/lib/current_api_client.rb
index dc40f158e..37e86976c 100644
--- a/services/api/lib/current_api_client.rb
+++ b/services/api/lib/current_api_client.rb
@@ -149,6 +149,9 @@ module CurrentApiClient
       yield
     ensure
       Thread.current[:user] = user_was
+      if user_was
+        user_was.forget_cached_group_perms
+      end
     end
   end
 
diff --git a/services/api/test/test_helper.rb b/services/api/test/test_helper.rb
index 5dc77cb98..ee7dac4cd 100644
--- a/services/api/test/test_helper.rb
+++ b/services/api/test/test_helper.rb
@@ -123,6 +123,7 @@ class ActiveSupport::TestCase
 
   def set_user_from_auth(auth_name)
     client_auth = api_client_authorizations(auth_name)
+    client_auth.user.forget_cached_group_perms
     Thread.current[:api_client_authorization] = client_auth
     Thread.current[:api_client] = client_auth.api_client
     Thread.current[:user] = client_auth.user

commit 98c01ebae43ea31b211b4890d182684f6ce4e356
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Nov 2 14:25:28 2020 -0500

    17040: Get user_uuids and embed them as a constant in the main query
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb
index 1800e125d..e1ae76ed2 100644
--- a/services/api/app/controllers/application_controller.rb
+++ b/services/api/app/controllers/application_controller.rb
@@ -578,7 +578,7 @@ class ApplicationController < ActionController::Base
       if @objects.respond_to? :except
         list[:items_available] = @objects.
           except(:limit).except(:offset).
-          distinct.count(:id)
+          count(@distinct ? :id : '*')
       end
     when 'none'
     else
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 86e946152..6a0a58f08 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -286,6 +286,7 @@ class ArvadosModel < ApplicationRecord
 
     sql_conds = nil
     user_uuids = users_list.map { |u| u.uuid }
+    all_user_uuids = []
 
     # For details on how the trashed_groups table is constructed, see
     # see db/migrate/20200501150153_permission_table.rb
@@ -319,12 +320,31 @@ class ArvadosModel < ApplicationRecord
       # A user can have can_manage access to another user, this grants
       # full access to all that user's stuff.  To implement that we
       # need to include those other users in the permission query.
-      user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: ":user_uuids", perm_level: 1}
+
+      # This was previously implemented by embedding the subquery
+      # directly into the query, but it was discovered later that this
+      # causes the Postgres query planner to do silly things because
+      # the query heuristics assumed the subquery would have a lot
+      # more rows that it does, and choose a bad merge strategy.  By
+      # doing the query here and embedding the result as a constant,
+      # Postgres also knows exactly how many items there are and can
+      # choose the right query strategy.
+      #
+      # (note: you could also do this with a temporary table, but that
+      # would require all every request be wrapped in a transaction,
+      # which is not currently the case).
+
+      all_user_uuids = ActiveRecord::Base.connection.exec_query %{
+#{USER_UUIDS_SUBQUERY_TEMPLATE % {user: "'#{user_uuids.join "', '"}'", perm_level: 1}}
+},
+                                             'readable_by.user_uuids'
+
+      user_uuids_subquery = ":user_uuids"
 
       # Note: it is possible to combine the direct_check and
-      # owner_check into a single EXISTS() clause, however it turns
+      # owner_check into a single IN (SELECT) clause, however it turns
       # out query optimizer doesn't like it and forces a sequential
-      # table scan.  Constructing the query with separate EXISTS()
+      # table scan.  Constructing the query with separate IN (SELECT)
       # clauses enables it to use the index.
       #
       # see issue 13208 for details.
@@ -353,6 +373,14 @@ class ArvadosModel < ApplicationRecord
       if sql_table != "api_client_authorizations" and sql_table != "groups" then
         owner_check = "#{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
                       "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 AND traverse_owned) "
+
+        # We want to do owner_check before direct_check in the OR
+        # clause.  The order of the OR clause isn't supposed to
+        # matter, but in practice, it does -- apparently in the
+        # absence of other hints, it uses the ordering from the query.
+        # For certain types of queries (like filtering on owner_uuid),
+        # every item will match the owner_check clause, so then
+        # Postgres will optimize out the direct_check entirely.
         direct_check = " OR " + direct_check
       end
 
@@ -379,7 +407,7 @@ class ArvadosModel < ApplicationRecord
     end
 
     self.where(sql_conds,
-               user_uuids: user_uuids,
+               user_uuids: all_user_uuids.collect{|c| c["target_uuid"]},
                permission_link_classes: ['permission', 'resources'])
   end
 

commit 9db0e491e43bdb3a0bd050ed9be395fd7c3b16dc
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Oct 26 22:55:46 2020 -0400

    17040: Refactor trash check
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 37f96c3ff..86e946152 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -296,21 +296,19 @@ class ArvadosModel < ApplicationRecord
       exclude_trashed_records = "AND (#{sql_table}.trash_at is NULL or #{sql_table}.trash_at > statement_timestamp())"
     end
 
+    trashed_check = ""
+    if !include_trash && sql_table != "api_client_authorizations"
+      trashed_check = "#{sql_table}.owner_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} " +
+                      "where trash_at <= statement_timestamp()) #{exclude_trashed_records}"
+    end
+
     if users_list.select { |u| u.is_admin }.any?
       # Admin skips most permission checks, but still want to filter on trashed items.
-      if !include_trash
-        if sql_table != "api_client_authorizations"
-          # Only include records where the owner is not trashed
-          sql_conds = "#{sql_table}.owner_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} "+
-                      "where trash_at <= statement_timestamp()) #{exclude_trashed_records}"
-        end
+      if !include_trash && sql_table != "api_client_authorizations"
+        # Only include records where the owner is not trashed
+        sql_conds = trashed_check
       end
     else
-      trashed_check = ""
-      if !include_trash then
-        trashed_check = "AND target_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} where trash_at <= statement_timestamp())"
-      end
-
       # The core of the permission check is a join against the
       # materialized_permissions table to determine if the user has at
       # least read permission to either the object itself or its
@@ -333,7 +331,7 @@ class ArvadosModel < ApplicationRecord
 
       # Match a direct read permission link from the user to the record uuid
       direct_check = "#{sql_table}.uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
-                     "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check})"
+                     "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1)"
 
       # Match a read permission for the user to the record's
       # owner_uuid.  This is so we can have a permissions table that
@@ -354,7 +352,7 @@ class ArvadosModel < ApplicationRecord
       owner_check = ""
       if sql_table != "api_client_authorizations" and sql_table != "groups" then
         owner_check = "#{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
-                      "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check} AND traverse_owned) "
+                      "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 AND traverse_owned) "
         direct_check = " OR " + direct_check
       end
 
@@ -367,7 +365,7 @@ class ArvadosModel < ApplicationRecord
                        "(#{sql_table}.head_uuid IN (#{user_uuids_subquery}) OR #{sql_table}.tail_uuid IN (#{user_uuids_subquery})))"
       end
 
-      sql_conds = "(#{owner_check} #{direct_check} #{links_cond}) #{exclude_trashed_records}"
+      sql_conds = "(#{owner_check} #{direct_check} #{links_cond}) #{trashed_check.empty? ? "" : "AND"} #{trashed_check}"
 
     end
 
diff --git a/services/api/lib/20200501150153_permission_table_constants.rb b/services/api/lib/20200501150153_permission_table_constants.rb
index 6e43a628c..74c15bc2e 100644
--- a/services/api/lib/20200501150153_permission_table_constants.rb
+++ b/services/api/lib/20200501150153_permission_table_constants.rb
@@ -63,7 +63,7 @@ def refresh_trashed
 INSERT INTO #{TRASHED_GROUPS}
 select ps.target_uuid as group_uuid, ps.trash_at from groups,
   lateral project_subtree_with_trash_at(groups.uuid, groups.trash_at) ps
-  where groups.owner_uuid like '_____-tpzed-_______________'
+  where groups.owner_uuid like '_____-tpzed-_______________' and ps.trash_at is not NULL
 })
   end
 end

commit 7ab13e1ff6b899291feb82b80b0425f518762a8a
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Oct 22 20:44:15 2020 -0400

    17040: Swap the order of where clauses in the readable_by query
    
    This seems work around a query planner bug in Postgres 9.5.
    
    A hotfix provided to a customer resulted in dramatically better
    performance.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 3966b7c39..37f96c3ff 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -353,8 +353,9 @@ class ArvadosModel < ApplicationRecord
       # other user owns.
       owner_check = ""
       if sql_table != "api_client_authorizations" and sql_table != "groups" then
-        owner_check = "OR #{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
-          "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check} AND traverse_owned) "
+        owner_check = "#{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
+                      "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check} AND traverse_owned) "
+        direct_check = " OR " + direct_check
       end
 
       links_cond = ""
@@ -366,7 +367,7 @@ class ArvadosModel < ApplicationRecord
                        "(#{sql_table}.head_uuid IN (#{user_uuids_subquery}) OR #{sql_table}.tail_uuid IN (#{user_uuids_subquery})))"
       end
 
-      sql_conds = "(#{direct_check} #{owner_check} #{links_cond}) #{exclude_trashed_records}"
+      sql_conds = "(#{owner_check} #{direct_check} #{links_cond}) #{exclude_trashed_records}"
 
     end
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list