20858: Enable 'exists' filter to use index 20858-jsonb-exists
authorPeter Amstutz <peter.amstutz@curii.com>
Tue, 15 Aug 2023 20:41:01 +0000 (16:41 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Tue, 15 Aug 2023 20:41:01 +0000 (16:41 -0400)
See comment in services/api/db/migrate/20230815160000_jsonb_exists_functions.rb

Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

services/api/db/migrate/20230815160000_jsonb_exists_functions.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/lib/record_filters.rb

diff --git a/services/api/db/migrate/20230815160000_jsonb_exists_functions.rb b/services/api/db/migrate/20230815160000_jsonb_exists_functions.rb
new file mode 100644 (file)
index 0000000..751babf
--- /dev/null
@@ -0,0 +1,50 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class JsonbExistsFunctions < ActiveRecord::Migration[5.2]
+  def up
+
+    # Define functions for the "?" and "?&" operators.  We can't use
+    # "?" and "?&" directly in ActiveRecord queries because "?" is
+    # used for parameter substitution.
+    #
+    # We used to use jsonb_exists() and jsonb_exists_all() but
+    # apparently Postgres associates indexes with operators but not
+    # with functions, so while a query using an operator can use the
+    # index, the equivalent clause using the function will always
+    # perform a full row scan.
+    #
+    # See ticket https://dev.arvados.org/issues/20858 for examples.
+    #
+    # As a workaround, we can define IMMUTABLE functions, which are
+    # directly inlined into the query, which then uses the index as
+    # intended.
+    #
+    # Huge shout out to this stack overflow post that explained what
+    # is going on and provides the workaround used here.
+    #
+    # https://dba.stackexchange.com/questions/90002/postgresql-operator-uses-index-but-underlying-function-does-not
+
+    ActiveRecord::Base.connection.execute %{
+CREATE OR REPLACE FUNCTION jsonb_exists_inline_op(jsonb, text)
+RETURNS bool
+LANGUAGE sql
+IMMUTABLE
+AS $$SELECT $1 ? $2$$
+}
+
+    ActiveRecord::Base.connection.execute %{
+CREATE OR REPLACE FUNCTION jsonb_exists_all_inline_op(jsonb, text[])
+RETURNS bool
+LANGUAGE sql
+IMMUTABLE
+AS 'SELECT $1 ?& $2'
+}
+  end
+
+  def down
+    ActiveRecord::Base.connection.execute "DROP FUNCTION jsonb_exists_inline_op"
+    ActiveRecord::Base.connection.execute "DROP FUNCTION jsonb_exists_all_inline_op"
+  end
+end
index 59af57f75cc2a8b545623b791677b125b377d2ae..24c5ba3e465fb191be0301b11922e86bb05f92c6 100644 (file)
@@ -255,6 +255,24 @@ select upd_container_uuid, upd_priority from tab;
 $$;
 
 
+--
+-- Name: jsonb_exists_all_inline_op(jsonb, text[]); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.jsonb_exists_all_inline_op(jsonb, text[]) RETURNS boolean
+    LANGUAGE sql IMMUTABLE
+    AS $_$SELECT $1 ?& $2$_$;
+
+
+--
+-- Name: jsonb_exists_inline_op(jsonb, text); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.jsonb_exists_inline_op(jsonb, text) RETURNS boolean
+    LANGUAGE sql IMMUTABLE
+    AS $_$SELECT $1 ? $2$_$;
+
+
 --
 -- Name: project_subtree_with_is_frozen(character varying, boolean); Type: FUNCTION; Schema: public; Owner: -
 --
@@ -3270,6 +3288,7 @@ INSERT INTO "schema_migrations" (version) VALUES
 ('20221219165512'),
 ('20221230155924'),
 ('20230421142716'),
-('20230503224107');
+('20230503224107'),
+('20230815160000');
 
 
index b15207b14e1720b53675e24a2473104bdd370af8..e51223254f7d21a34b5910e2f68a59abee4c22cb 100644 (file)
@@ -121,9 +121,9 @@ module RecordFilters
             end
           when 'exists'
             if operand == true
-              cond_out << "jsonb_exists(#{attr_table_name}.#{attr}, ?)"
+              cond_out << "jsonb_exists_inline_op(#{attr_table_name}.#{attr}, ?)"
             elsif operand == false
-              cond_out << "(NOT jsonb_exists(#{attr_table_name}.#{attr}, ?)) OR #{attr_table_name}.#{attr} is NULL"
+              cond_out << "(NOT jsonb_exists_inline_op(#{attr_table_name}.#{attr}, ?)) OR #{attr_table_name}.#{attr} is NULL"
             else
               raise ArgumentError.new("Invalid operand '#{operand}' for '#{operator}' must be true or false")
             end
@@ -140,7 +140,7 @@ module RecordFilters
             raise ArgumentError.new("Invalid attribute '#{attr}' for operator '#{operator}' in filter")
           end
 
-          cond_out << "jsonb_exists(#{attr_table_name}.#{attr}, ?)"
+          cond_out << "jsonb_exists_inline_op(#{attr_table_name}.#{attr}, ?)"
           param_out << operand
         elsif expr = /^ *\( *(\w+) *(<=?|>=?|=) *(\w+) *\) *$/.match(attr)
           if operator != '=' || ![true,"true"].index(operand)
@@ -270,13 +270,18 @@ module RecordFilters
                 raise ArgumentError.new("Invalid element #{operand.inspect} in operand for #{operator.inspect} operator (operand must be a string or array of strings)")
               end
             end
-            # We use jsonb_exists_all(a,b) instead of "a ?& b" because
-            # the pg gem thinks "?" is a bind var. And we use string
-            # interpolation instead of param_out because the pg gem
-            # flattens param_out / doesn't support passing arrays as
-            # bind vars.
+            # We use jsonb_exists_all_inline_op(a,b) instead of "a ?&
+            # b" because the pg gem thinks "?" is a bind var.
+            #
+            # See note in migration
+            # 20230815160000_jsonb_exists_functions about _inline_op
+            # functions.
+            #
+            # We use string interpolation instead of param_out
+            # because the pg gem flattens param_out / doesn't support
+            # passing arrays as bind vars.
             q = operand.map { |s| ActiveRecord::Base.connection.quote(s) }.join(',')
-            cond_out << "jsonb_exists_all(#{attr_table_name}.#{attr}, array[#{q}])"
+            cond_out << "jsonb_exists_all_inline_op(#{attr_table_name}.#{attr}, array[#{q}])"
           else
             raise ArgumentError.new("Invalid operator '#{operator}'")
           end