From c1298836a79c1f3734c95f87f11615daf70806e3 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 15 Aug 2023 16:41:01 -0400 Subject: [PATCH] 20858: Enable 'exists' filter to use index See comment in services/api/db/migrate/20230815160000_jsonb_exists_functions.rb Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- .../20230815160000_jsonb_exists_functions.rb | 50 +++++++++++++++++++ services/api/db/structure.sql | 21 +++++++- services/api/lib/record_filters.rb | 23 +++++---- 3 files changed, 84 insertions(+), 10 deletions(-) create mode 100644 services/api/db/migrate/20230815160000_jsonb_exists_functions.rb diff --git a/services/api/db/migrate/20230815160000_jsonb_exists_functions.rb b/services/api/db/migrate/20230815160000_jsonb_exists_functions.rb new file mode 100644 index 0000000000..751babff1f --- /dev/null +++ b/services/api/db/migrate/20230815160000_jsonb_exists_functions.rb @@ -0,0 +1,50 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +class JsonbExistsFunctions < ActiveRecord::Migration[5.2] + def up + + # Define functions for the "?" and "?&" operators. We can't use + # "?" and "?&" directly in ActiveRecord queries because "?" is + # used for parameter substitution. + # + # We used to use jsonb_exists() and jsonb_exists_all() but + # apparently Postgres associates indexes with operators but not + # with functions, so while a query using an operator can use the + # index, the equivalent clause using the function will always + # perform a full row scan. + # + # See ticket https://dev.arvados.org/issues/20858 for examples. + # + # As a workaround, we can define IMMUTABLE functions, which are + # directly inlined into the query, which then uses the index as + # intended. + # + # Huge shout out to this stack overflow post that explained what + # is going on and provides the workaround used here. + # + # https://dba.stackexchange.com/questions/90002/postgresql-operator-uses-index-but-underlying-function-does-not + + ActiveRecord::Base.connection.execute %{ +CREATE OR REPLACE FUNCTION jsonb_exists_inline_op(jsonb, text) +RETURNS bool +LANGUAGE sql +IMMUTABLE +AS $$SELECT $1 ? $2$$ +} + + ActiveRecord::Base.connection.execute %{ +CREATE OR REPLACE FUNCTION jsonb_exists_all_inline_op(jsonb, text[]) +RETURNS bool +LANGUAGE sql +IMMUTABLE +AS 'SELECT $1 ?& $2' +} + end + + def down + ActiveRecord::Base.connection.execute "DROP FUNCTION jsonb_exists_inline_op" + ActiveRecord::Base.connection.execute "DROP FUNCTION jsonb_exists_all_inline_op" + end +end diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql index 59af57f75c..24c5ba3e46 100644 --- a/services/api/db/structure.sql +++ b/services/api/db/structure.sql @@ -255,6 +255,24 @@ select upd_container_uuid, upd_priority from tab; $$; +-- +-- Name: jsonb_exists_all_inline_op(jsonb, text[]); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.jsonb_exists_all_inline_op(jsonb, text[]) RETURNS boolean + LANGUAGE sql IMMUTABLE + AS $_$SELECT $1 ?& $2$_$; + + +-- +-- Name: jsonb_exists_inline_op(jsonb, text); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.jsonb_exists_inline_op(jsonb, text) RETURNS boolean + LANGUAGE sql IMMUTABLE + AS $_$SELECT $1 ? $2$_$; + + -- -- Name: project_subtree_with_is_frozen(character varying, boolean); Type: FUNCTION; Schema: public; Owner: - -- @@ -3270,6 +3288,7 @@ INSERT INTO "schema_migrations" (version) VALUES ('20221219165512'), ('20221230155924'), ('20230421142716'), -('20230503224107'); +('20230503224107'), +('20230815160000'); diff --git a/services/api/lib/record_filters.rb b/services/api/lib/record_filters.rb index b15207b14e..e51223254f 100644 --- a/services/api/lib/record_filters.rb +++ b/services/api/lib/record_filters.rb @@ -121,9 +121,9 @@ module RecordFilters end when 'exists' if operand == true - cond_out << "jsonb_exists(#{attr_table_name}.#{attr}, ?)" + cond_out << "jsonb_exists_inline_op(#{attr_table_name}.#{attr}, ?)" elsif operand == false - cond_out << "(NOT jsonb_exists(#{attr_table_name}.#{attr}, ?)) OR #{attr_table_name}.#{attr} is NULL" + cond_out << "(NOT jsonb_exists_inline_op(#{attr_table_name}.#{attr}, ?)) OR #{attr_table_name}.#{attr} is NULL" else raise ArgumentError.new("Invalid operand '#{operand}' for '#{operator}' must be true or false") end @@ -140,7 +140,7 @@ module RecordFilters raise ArgumentError.new("Invalid attribute '#{attr}' for operator '#{operator}' in filter") end - cond_out << "jsonb_exists(#{attr_table_name}.#{attr}, ?)" + cond_out << "jsonb_exists_inline_op(#{attr_table_name}.#{attr}, ?)" param_out << operand elsif expr = /^ *\( *(\w+) *(<=?|>=?|=) *(\w+) *\) *$/.match(attr) if operator != '=' || ![true,"true"].index(operand) @@ -270,13 +270,18 @@ module RecordFilters raise ArgumentError.new("Invalid element #{operand.inspect} in operand for #{operator.inspect} operator (operand must be a string or array of strings)") end end - # We use jsonb_exists_all(a,b) instead of "a ?& b" because - # the pg gem thinks "?" is a bind var. And we use string - # interpolation instead of param_out because the pg gem - # flattens param_out / doesn't support passing arrays as - # bind vars. + # We use jsonb_exists_all_inline_op(a,b) instead of "a ?& + # b" because the pg gem thinks "?" is a bind var. + # + # See note in migration + # 20230815160000_jsonb_exists_functions about _inline_op + # functions. + # + # We use string interpolation instead of param_out + # because the pg gem flattens param_out / doesn't support + # passing arrays as bind vars. q = operand.map { |s| ActiveRecord::Base.connection.quote(s) }.join(',') - cond_out << "jsonb_exists_all(#{attr_table_name}.#{attr}, array[#{q}])" + cond_out << "jsonb_exists_all_inline_op(#{attr_table_name}.#{attr}, array[#{q}])" else raise ArgumentError.new("Invalid operator '#{operator}'") end -- 2.30.2