[arvados] created: 2.6.0-504-gc1298836a

git repository hosting git at public.arvados.org
Tue Aug 15 20:42:51 UTC 2023


        at  c1298836a79c1f3734c95f87f11615daf70806e3 (commit)


commit c1298836a79c1f3734c95f87f11615daf70806e3
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Tue Aug 15 16:41:01 2023 -0400

    20858: Enable 'exists' filter to use index
    
    See comment in services/api/db/migrate/20230815160000_jsonb_exists_functions.rb
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/api/db/migrate/20230815160000_jsonb_exists_functions.rb b/services/api/db/migrate/20230815160000_jsonb_exists_functions.rb
new file mode 100644
index 000000000..751babff1
--- /dev/null
+++ b/services/api/db/migrate/20230815160000_jsonb_exists_functions.rb
@@ -0,0 +1,50 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class JsonbExistsFunctions < ActiveRecord::Migration[5.2]
+  def up
+
+    # Define functions for the "?" and "?&" operators.  We can't use
+    # "?" and "?&" directly in ActiveRecord queries because "?" is
+    # used for parameter substitution.
+    #
+    # We used to use jsonb_exists() and jsonb_exists_all() but
+    # apparently Postgres associates indexes with operators but not
+    # with functions, so while a query using an operator can use the
+    # index, the equivalent clause using the function will always
+    # perform a full row scan.
+    #
+    # See ticket https://dev.arvados.org/issues/20858 for examples.
+    #
+    # As a workaround, we can define IMMUTABLE functions, which are
+    # directly inlined into the query, which then uses the index as
+    # intended.
+    #
+    # Huge shout out to this stack overflow post that explained what
+    # is going on and provides the workaround used here.
+    #
+    # https://dba.stackexchange.com/questions/90002/postgresql-operator-uses-index-but-underlying-function-does-not
+
+    ActiveRecord::Base.connection.execute %{
+CREATE OR REPLACE FUNCTION jsonb_exists_inline_op(jsonb, text)
+RETURNS bool
+LANGUAGE sql
+IMMUTABLE
+AS $$SELECT $1 ? $2$$
+}
+
+    ActiveRecord::Base.connection.execute %{
+CREATE OR REPLACE FUNCTION jsonb_exists_all_inline_op(jsonb, text[])
+RETURNS bool
+LANGUAGE sql
+IMMUTABLE
+AS 'SELECT $1 ?& $2'
+}
+  end
+
+  def down
+    ActiveRecord::Base.connection.execute "DROP FUNCTION jsonb_exists_inline_op"
+    ActiveRecord::Base.connection.execute "DROP FUNCTION jsonb_exists_all_inline_op"
+  end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql
index 59af57f75..24c5ba3e4 100644
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -255,6 +255,24 @@ select upd_container_uuid, upd_priority from tab;
 $$;
 
 
+--
+-- Name: jsonb_exists_all_inline_op(jsonb, text[]); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.jsonb_exists_all_inline_op(jsonb, text[]) RETURNS boolean
+    LANGUAGE sql IMMUTABLE
+    AS $_$SELECT $1 ?& $2$_$;
+
+
+--
+-- Name: jsonb_exists_inline_op(jsonb, text); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.jsonb_exists_inline_op(jsonb, text) RETURNS boolean
+    LANGUAGE sql IMMUTABLE
+    AS $_$SELECT $1 ? $2$_$;
+
+
 --
 -- Name: project_subtree_with_is_frozen(character varying, boolean); Type: FUNCTION; Schema: public; Owner: -
 --
@@ -3270,6 +3288,7 @@ INSERT INTO "schema_migrations" (version) VALUES
 ('20221219165512'),
 ('20221230155924'),
 ('20230421142716'),
-('20230503224107');
+('20230503224107'),
+('20230815160000');
 
 
diff --git a/services/api/lib/record_filters.rb b/services/api/lib/record_filters.rb
index b15207b14..e51223254 100644
--- a/services/api/lib/record_filters.rb
+++ b/services/api/lib/record_filters.rb
@@ -121,9 +121,9 @@ module RecordFilters
             end
           when 'exists'
             if operand == true
-              cond_out << "jsonb_exists(#{attr_table_name}.#{attr}, ?)"
+              cond_out << "jsonb_exists_inline_op(#{attr_table_name}.#{attr}, ?)"
             elsif operand == false
-              cond_out << "(NOT jsonb_exists(#{attr_table_name}.#{attr}, ?)) OR #{attr_table_name}.#{attr} is NULL"
+              cond_out << "(NOT jsonb_exists_inline_op(#{attr_table_name}.#{attr}, ?)) OR #{attr_table_name}.#{attr} is NULL"
             else
               raise ArgumentError.new("Invalid operand '#{operand}' for '#{operator}' must be true or false")
             end
@@ -140,7 +140,7 @@ module RecordFilters
             raise ArgumentError.new("Invalid attribute '#{attr}' for operator '#{operator}' in filter")
           end
 
-          cond_out << "jsonb_exists(#{attr_table_name}.#{attr}, ?)"
+          cond_out << "jsonb_exists_inline_op(#{attr_table_name}.#{attr}, ?)"
           param_out << operand
         elsif expr = /^ *\( *(\w+) *(<=?|>=?|=) *(\w+) *\) *$/.match(attr)
           if operator != '=' || ![true,"true"].index(operand)
@@ -270,13 +270,18 @@ module RecordFilters
                 raise ArgumentError.new("Invalid element #{operand.inspect} in operand for #{operator.inspect} operator (operand must be a string or array of strings)")
               end
             end
-            # We use jsonb_exists_all(a,b) instead of "a ?& b" because
-            # the pg gem thinks "?" is a bind var. And we use string
-            # interpolation instead of param_out because the pg gem
-            # flattens param_out / doesn't support passing arrays as
-            # bind vars.
+            # We use jsonb_exists_all_inline_op(a,b) instead of "a ?&
+            # b" because the pg gem thinks "?" is a bind var.
+            #
+            # See note in migration
+            # 20230815160000_jsonb_exists_functions about _inline_op
+            # functions.
+            #
+            # We use string interpolation instead of param_out
+            # because the pg gem flattens param_out / doesn't support
+            # passing arrays as bind vars.
             q = operand.map { |s| ActiveRecord::Base.connection.quote(s) }.join(',')
-            cond_out << "jsonb_exists_all(#{attr_table_name}.#{attr}, array[#{q}])"
+            cond_out << "jsonb_exists_all_inline_op(#{attr_table_name}.#{attr}, array[#{q}])"
           else
             raise ArgumentError.new("Invalid operator '#{operator}'")
           end

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list