From 841194f2456f9f4874fe6ebe1e4639c0b0421b97 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 2 Oct 2023 16:37:53 -0400 Subject: [PATCH] 21030: Use a CTE instead of temporary table for update_permissions Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- services/api/lib/update_permissions.rb | 97 ++++++++------------------ 1 file changed, 29 insertions(+), 68 deletions(-) diff --git a/services/api/lib/update_permissions.rb b/services/api/lib/update_permissions.rb index b7e5476404..138d287f7f 100644 --- a/services/api/lib/update_permissions.rb +++ b/services/api/lib/update_permissions.rb @@ -69,75 +69,32 @@ def update_permissions perm_origin_uuid, starting_uuid, perm_level, edge_id=nil # parallel with a transaction holding this lock mode." ActiveRecord::Base.connection.execute "LOCK TABLE #{PERMISSION_VIEW} in EXCLUSIVE MODE" - # Workaround for - # BUG #15160: planner overestimates number of rows in join when there are more than 200 rows coming from CTE - # https://www.postgresql.org/message-id/152395805004.19366.3107109716821067806@wrigleys.postgresql.org - # - # For a crucial join in the compute_permission_subgraph() query, the - # planner mis-estimates the number of rows in a Common Table - # Expression (CTE, this is a subquery in a WITH clause) and as a - # result it chooses the wrong join order. The join starts with the - # permissions table because it mistakenly thinks - # count(materalized_permissions) < count(new computed permissions) - # when actually it is the other way around. - # - # Because of the incorrect join order, it choose the wrong join - # strategy (merge join, which works best when two tables are roughly - # the same size). As a workaround, we can tell it not to use that - # join strategy, this causes it to pick hash join instead, which - # turns out to be a bit better. However, because the join order is - # still wrong, we don't get the full benefit of the index. - # - # This is very unfortunate because it makes the query performance - # dependent on the size of the materalized_permissions table, when - # the goal of this design was to make permission updates scale-free - # and only depend on the number of permissions affected and not the - # total table size. In several hours of researching I wasn't able - # to find a way to force the correct join order, so I'm calling it - # here and I have to move on. - # - # This is apparently addressed in Postgres 12, but I developed & - # tested this on Postgres 9.6, so in the future we should reevaluate - # the performance & query plan on Postgres 12. - # - # https://git.furworks.de/opensourcemirror/postgresql/commit/a314c34079cf06d05265623dd7c056f8fa9d577f - # - # Disable merge join for just this query (also local for this transaction), then reenable it. - ActiveRecord::Base.connection.exec_query "SET LOCAL enable_mergejoin to false;" - - temptable_perms = "temp_perms_#{rand(2**64).to_s(10)}" - ActiveRecord::Base.connection.exec_query %{ -create temporary table #{temptable_perms} on commit drop -as select * from compute_permission_subgraph($1, $2, $3, $4) -}, - 'update_permissions.select', - [[nil, perm_origin_uuid], - [nil, starting_uuid], - [nil, perm_level], - [nil, edge_id]] - - ActiveRecord::Base.connection.exec_query "SET LOCAL enable_mergejoin to true;" - - # Now that we have recomputed a set of permissions, delete any - # rows from the materialized_permissions table where (target_uuid, - # user_uuid) is not present or has perm_level=0 in the recomputed - # set. - ActiveRecord::Base.connection.exec_delete %{ -delete from #{PERMISSION_VIEW} where - target_uuid in (select target_uuid from #{temptable_perms}) and - not exists (select 1 from #{temptable_perms} - where target_uuid=#{PERMISSION_VIEW}.target_uuid and - user_uuid=#{PERMISSION_VIEW}.user_uuid and - val>0) -}, - "update_permissions.delete" - - # Now insert-or-update permissions in the recomputed set. The - # WHERE clause is important to avoid redundantly updating rows - # that haven't actually changed. ActiveRecord::Base.connection.exec_query %{ +with temptable_perms as ( + select * from compute_permission_subgraph($1, $2, $3, $4)), + +/* + Now that we have recomputed a set of permissions, delete any + rows from the materialized_permissions table where (target_uuid, + user_uuid) is not present or has perm_level=0 in the recomputed + set. +*/ +delete_rows as ( + delete from #{PERMISSION_VIEW} where + target_uuid in (select target_uuid from temptable_perms) and + not exists (select 1 from temptable_perms + where target_uuid=#{PERMISSION_VIEW}.target_uuid and + user_uuid=#{PERMISSION_VIEW}.user_uuid and + val>0) +) + +/* + Now insert-or-update permissions in the recomputed set. The + WHERE clause is important to avoid redundantly updating rows + that haven't actually changed. +*/ insert into #{PERMISSION_VIEW} (user_uuid, target_uuid, perm_level, traverse_owned) - select user_uuid, target_uuid, val as perm_level, traverse_owned from #{temptable_perms} where val>0 + select user_uuid, target_uuid, val as perm_level, traverse_owned from temptable_perms where val>0 on conflict (user_uuid, target_uuid) do update set perm_level=EXCLUDED.perm_level, traverse_owned=EXCLUDED.traverse_owned where #{PERMISSION_VIEW}.user_uuid=EXCLUDED.user_uuid and @@ -145,7 +102,11 @@ where #{PERMISSION_VIEW}.user_uuid=EXCLUDED.user_uuid and (#{PERMISSION_VIEW}.perm_level != EXCLUDED.perm_level or #{PERMISSION_VIEW}.traverse_owned != EXCLUDED.traverse_owned); }, - "update_permissions.insert" + 'update_permissions.select', + [[nil, perm_origin_uuid], + [nil, starting_uuid], + [nil, perm_level], + [nil, edge_id]] if perm_level>0 check_permissions_against_full_refresh -- 2.30.2