[ARVADOS] created: 1.1.1-213-g363fc63

Git user git at public.curoverse.com
Mon Dec 11 14:22:38 EST 2017


        at  363fc6395f866a9631f41630cbff49ac53e9a211 (commit)


commit 363fc6395f866a9631f41630cbff49ac53e9a211
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Mon Dec 11 11:54:52 2017 -0500

    12511: readable_by filters on is_trashed directly
    
    Fix performance problem in the negative join NOT EXISTS(...) used to filter out
    trashed items for admin users.  The clause
    (uuid = target_uuid OR owner_uuid = target_uuid) doesn't use the index
    efficiently and results in a very expensive seq scan.
    
    However, the "OR" in the negative clause is not necessary.  We only need
    materialized_permission_view to know if "owner_uuid" is trashed (directly or
    indirectly), not "uuid".  If the record type has an is_trashed flag (currently
    only collections and groups) then it is more efficient to filter on that
    directly.
    
    This commit refactors the readable_by query to only check owner_uuid, and
    additionally filter on "is_trashed" for collections and groups.
    
    As an additional cleanup, because "readable_by" now explicitly knows to filter
    on "is_trashed" this makes the default_scope in Collections redundant.  This
    commit also removes default_scope & various uses of unscoped reduces complexity.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 3803d37..fb75007 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -28,7 +28,7 @@ class Arvados::V1::CollectionsController < ApplicationController
 
   def find_objects_for_index
     if params[:include_trash] || ['destroy', 'trash', 'untrash'].include?(action_name)
-      @objects = Collection.readable_by(*@read_users, {include_trash: true, query_on: Collection.unscoped})
+      @objects = Collection.readable_by(*@read_users, {include_trash: true})
     end
     super
   end
diff --git a/services/api/app/controllers/arvados/v1/groups_controller.rb b/services/api/app/controllers/arvados/v1/groups_controller.rb
index 8e195ff..ec3b69a 100644
--- a/services/api/app/controllers/arvados/v1/groups_controller.rb
+++ b/services/api/app/controllers/arvados/v1/groups_controller.rb
@@ -163,12 +163,7 @@ class Arvados::V1::GroupsController < ApplicationController
         end
       end.compact
 
-      query_on = if klass == Collection and params[:include_trash]
-                   klass.unscoped
-                 else
-                   klass
-                 end
-      @objects = query_on.readable_by(*@read_users, {:include_trash => params[:include_trash]}).
+      @objects = klass.readable_by(*@read_users, {:include_trash => params[:include_trash]}).
                  order(request_order).where(where_conds)
 
       klass_limit = limit_all - all_objects.count
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 08d7e93..05deba7 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -255,36 +255,44 @@ class ArvadosModel < ActiveRecord::Base
     # Collect the UUIDs of the authorized users.
     sql_table = kwargs.fetch(:table_name, table_name)
     include_trash = kwargs.fetch(:include_trash, false)
-    query_on = kwargs.fetch(:query_on, self)
 
     sql_conds = []
     user_uuids = users_list.map { |u| u.uuid }
 
+    exclude_trashed_records = if !include_trash and (sql_table == "groups" or sql_table == "collections") then
+                                # Only include records that are not explicitly trashed
+                                "AND #{sql_table}.is_trashed = false"
+                              else
+                                ""
+                              end
+
     if users_list.select { |u| u.is_admin }.any?
       if !include_trash
-        # exclude rows that are explicitly trashed.
         if sql_table != "api_client_authorizations"
-          sql_conds.push "NOT EXISTS(SELECT 1
-                  FROM #{PERMISSION_VIEW}
-                  WHERE trashed = 1 AND
-                  (#{sql_table}.uuid = target_uuid OR #{sql_table}.owner_uuid = target_uuid))"
+          # Exclude rows where the owner is trashed
+          sql_conds.push "NOT EXISTS(SELECT 1 "+
+                  "FROM #{PERMISSION_VIEW} "+
+                  "WHERE trashed = 1 AND "+
+                  "(#{sql_table}.owner_uuid = target_uuid)) "+
+                  exclude_trashed_records
         end
       end
     else
-      if include_trash
-        trashed_check = ""
-      else
-        trashed_check = "AND trashed = 0"
-      end
-
-      if sql_table != "api_client_authorizations" and sql_table != "groups"
-        owner_check = "OR (target_uuid = #{sql_table}.owner_uuid AND target_owner_uuid IS NOT NULL)"
-      else
-        owner_check = ""
-      end
+      trashed_check = if !include_trash then
+                        "AND trashed = 0"
+                      else
+                        ""
+                      end
+
+      owner_check = if sql_table != "api_client_authorizations" and sql_table != "groups" then
+                      "OR (target_uuid = #{sql_table}.owner_uuid AND target_owner_uuid IS NOT NULL)"
+                    else
+                      ""
+                    end
 
       sql_conds.push "EXISTS(SELECT 1 FROM #{PERMISSION_VIEW} "+
-                     "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND (target_uuid = #{sql_table}.uuid #{owner_check}))"
+                     "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND (target_uuid = #{sql_table}.uuid #{owner_check})) "+
+                     exclude_trashed_records
 
       if sql_table == "links"
         # Match any permission link that gives one of the authorized
@@ -295,7 +303,7 @@ class ArvadosModel < ActiveRecord::Base
       end
     end
 
-    query_on.where(sql_conds.join(' OR '),
+    self.where(sql_conds.join(' OR '),
                     user_uuids: user_uuids,
                     permission_link_classes: ['permission', 'resources'])
   end
@@ -734,7 +742,7 @@ class ArvadosModel < ActiveRecord::Base
     if self == ArvadosModel
       # If called directly as ArvadosModel.find_by_uuid rather than via subclass,
       # delegate to the appropriate subclass based on the given uuid.
-      self.resource_class_for_uuid(uuid).unscoped.find_by_uuid(uuid)
+      self.resource_class_for_uuid(uuid).find_by_uuid(uuid)
     else
       super
     end
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index ce7e9fe..c5fd96e 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -24,9 +24,6 @@ class Collection < ArvadosModel
   validate :ensure_pdh_matches_manifest_text
   before_save :set_file_names
 
-  # Query only untrashed collections by default.
-  default_scope { where("is_trashed = false") }
-
   api_accessible :user, extend: :common do |t|
     t.add :name
     t.add :description
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index b3739da..edcb850 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -437,12 +437,10 @@ class Container < ArvadosModel
     # that a container cannot "claim" a collection that it doesn't otherwise
     # have access to just by setting the output field to the collection PDH.
     if output_changed?
-      c = Collection.unscoped do
-        Collection.
-            readable_by(current_user).
+      c = Collection.
+            readable_by(current_user, {include_trash: true}).
             where(portable_data_hash: self.output).
             first
-      end
       if !c
         errors.add :output, "collection must exist and be readable by current user."
       end
diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb
index 25becb2..3596bf3 100644
--- a/services/api/app/models/container_request.rb
+++ b/services/api/app/models/container_request.rb
@@ -120,9 +120,7 @@ class ContainerRequest < ArvadosModel
           trash_at = db_current_time + self.output_ttl
         end
       end
-      manifest = Collection.unscoped do
-        Collection.where(portable_data_hash: pdh).first.manifest_text
-      end
+      manifest = Collection.where(portable_data_hash: pdh).first.manifest_text
 
       coll = Collection.new(owner_uuid: owner_uuid,
                             manifest_text: manifest,
diff --git a/services/api/lib/sweep_trashed_collections.rb b/services/api/lib/sweep_trashed_collections.rb
index 84497a1..a899191 100644
--- a/services/api/lib/sweep_trashed_collections.rb
+++ b/services/api/lib/sweep_trashed_collections.rb
@@ -9,10 +9,10 @@ module SweepTrashedCollections
 
   def self.sweep_now
     act_as_system_user do
-      Collection.unscoped.
+      Collection.
         where('delete_at is not null and delete_at < statement_timestamp()').
         destroy_all
-      Collection.unscoped.
+      Collection.
         where('is_trashed = false and trash_at < statement_timestamp()').
         update_all('is_trashed = true')
     end
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb
index 47f0887..e6ecea2 100644
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -991,7 +991,7 @@ EOS
       id: uuid,
     }
     assert_response 200
-    c = Collection.unscoped.find_by_uuid(uuid)
+    c = Collection.find_by_uuid(uuid)
     assert_operator c.trash_at, :<, db_current_time
     assert_equal c.delete_at, c.trash_at + Rails.configuration.blob_signature_ttl
   end
@@ -1003,7 +1003,7 @@ EOS
       id: uuid,
     }
     assert_response 200
-    c = Collection.unscoped.find_by_uuid(uuid)
+    c = Collection.find_by_uuid(uuid)
     assert_operator c.trash_at, :<, db_current_time
     assert_operator c.delete_at, :<, db_current_time
   end
@@ -1023,7 +1023,7 @@ EOS
         id: uuid,
       }
       assert_response 200
-      c = Collection.unscoped.find_by_uuid(uuid)
+      c = Collection.find_by_uuid(uuid)
       assert_operator c.trash_at, :<, db_current_time
       assert_operator c.delete_at, :>=, time_before_trashing + Rails.configuration.default_trash_lifetime
     end
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb
index ba8f1e5..62e3755 100644
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -352,9 +352,12 @@ class CollectionTest < ActiveSupport::TestCase
       assert c.valid?
       uuid = c.uuid
 
+      c = Collection.readable_by(current_user).where(uuid: uuid)
+      assert_not_empty c, 'Should be able to find live collection'
+
       # mark collection as expired
-      c.update_attributes!(trash_at: Time.new.strftime("%Y-%m-%d"))
-      c = Collection.where(uuid: uuid)
+      c.first.update_attributes!(trash_at: Time.new.strftime("%Y-%m-%d"))
+      c = Collection.readable_by(current_user).where(uuid: uuid)
       assert_empty c, 'Should not be able to find expired collection'
 
       # recreate collection with the same name
@@ -419,7 +422,7 @@ class CollectionTest < ActiveSupport::TestCase
         if fixture_name == :expired_collection
           # Fixture-finder shorthand doesn't find trashed collections
           # because they're not in the default scope.
-          c = Collection.unscoped.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3ih')
+          c = Collection.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3ih')
         else
           c = collections(fixture_name)
         end
@@ -488,7 +491,7 @@ class CollectionTest < ActiveSupport::TestCase
       end
     end
     SweepTrashedCollections.sweep_now
-    c = Collection.unscoped.where('uuid=? and is_trashed=true', c.uuid).first
+    c = Collection.where('uuid=? and is_trashed=true', c.uuid).first
     assert c
     act_as_user users(:active) do
       assert Collection.create!(owner_uuid: c.owner_uuid,
@@ -498,9 +501,9 @@ class CollectionTest < ActiveSupport::TestCase
 
   test "delete in SweepTrashedCollections" do
     uuid = 'zzzzz-4zz18-3u1p5umicfpqszp' # deleted_on_next_sweep
-    assert_not_empty Collection.unscoped.where(uuid: uuid)
+    assert_not_empty Collection.where(uuid: uuid)
     SweepTrashedCollections.sweep_now
-    assert_empty Collection.unscoped.where(uuid: uuid)
+    assert_empty Collection.where(uuid: uuid)
   end
 
   test "delete referring links in SweepTrashedCollections" do
@@ -512,10 +515,10 @@ class CollectionTest < ActiveSupport::TestCase
                    name: 'something')
     end
     past = db_current_time
-    Collection.unscoped.where(uuid: uuid).
+    Collection.where(uuid: uuid).
       update_all(is_trashed: true, trash_at: past, delete_at: past)
-    assert_not_empty Collection.unscoped.where(uuid: uuid)
+    assert_not_empty Collection.where(uuid: uuid)
     SweepTrashedCollections.sweep_now
-    assert_empty Collection.unscoped.where(uuid: uuid)
+    assert_empty Collection.where(uuid: uuid)
   end
 end
diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb
index eb4f35f..3e2b8c1 100644
--- a/services/api/test/unit/container_test.rb
+++ b/services/api/test/unit/container_test.rb
@@ -596,7 +596,7 @@ class ContainerTest < ActiveSupport::TestCase
     c.lock
     c.update_attributes! state: Container::Running
 
-    output = Collection.unscoped.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3jk')
+    output = Collection.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3jk')
 
     assert output.is_trashed
     assert c.update_attributes output: output.portable_data_hash
@@ -609,7 +609,7 @@ class ContainerTest < ActiveSupport::TestCase
     c.lock
     c.update_attributes! state: Container::Running
 
-    output = Collection.unscoped.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3jr')
+    output = Collection.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3jr')
 
     Thread.current[:api_client_authorization] = ApiClientAuthorization.find_by_uuid(c.auth_uuid)
     Thread.current[:user] = User.find_by_id(Thread.current[:api_client_authorization].user_id)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list