Merge branch '21030-update-perm-cte' refs #21030
authorPeter Amstutz <peter.amstutz@curii.com>
Thu, 19 Oct 2023 18:26:44 +0000 (14:26 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Thu, 19 Oct 2023 18:26:44 +0000 (14:26 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

services/api/app/models/group.rb
services/api/db/migrate/20231013000000_compute_permission_index.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/lib/update_permissions.rb

index 5c0aeba589aa65867bd30c48dd56fd9a8fce3193..0c9248b819e81091867b126d20f5c2797e0d8e64 100644 (file)
@@ -163,63 +163,70 @@ class Group < ArvadosModel
     #   Remove groups that don't belong from trash
     #   Add/update groups that do belong in the trash
 
-    temptable = "group_subtree_#{rand(2**64).to_s(10)}"
-    ActiveRecord::Base.connection.exec_query(
-      "create temporary table #{temptable} on commit drop " +
-      "as select * from project_subtree_with_trash_at($1, LEAST($2, $3)::timestamp)",
+    frozen_descendants = ActiveRecord::Base.connection.exec_query(%{
+with temptable as (select * from project_subtree_with_trash_at($1, LEAST($2, $3)::timestamp))
+      select uuid from frozen_groups, temptable where uuid = target_uuid
+},
       "Group.update_trash.select",
       [[nil, self.uuid],
        [nil, TrashedGroup.find_by_group_uuid(self.owner_uuid).andand.trash_at],
        [nil, self.trash_at]])
-    frozen_descendants = ActiveRecord::Base.connection.exec_query(
-      "select uuid from frozen_groups, #{temptable} where uuid = target_uuid",
-      "Group.update_trash.check_frozen")
     if frozen_descendants.any?
       raise ArgumentError.new("cannot trash project containing frozen project #{frozen_descendants[0]["uuid"]}")
     end
-    ActiveRecord::Base.connection.exec_delete(
-      "delete from trashed_groups where group_uuid in (select target_uuid from #{temptable} where trash_at is NULL)",
-      "Group.update_trash.delete")
-    ActiveRecord::Base.connection.exec_query(
-      "insert into trashed_groups (group_uuid, trash_at) "+
-      "select target_uuid as group_uuid, trash_at from #{temptable} where trash_at is not NULL " +
-      "on conflict (group_uuid) do update set trash_at=EXCLUDED.trash_at",
-      "Group.update_trash.insert")
-    ActiveRecord::Base.connection.exec_query(
-      "select container_uuid from container_requests where " +
-      "owner_uuid in (select target_uuid from #{temptable}) and " +
-      "requesting_container_uuid is NULL and state = 'Committed' and container_uuid is not NULL",
-      "Group.update_trash.update_priorities").each do |container_uuid|
+
+    ActiveRecord::Base.connection.exec_query(%{
+with temptable as (select * from project_subtree_with_trash_at($1, LEAST($2, $3)::timestamp)),
+
+delete_rows as (delete from trashed_groups where group_uuid in (select target_uuid from temptable where trash_at is NULL)),
+
+insert_rows as (insert into trashed_groups (group_uuid, trash_at)
+  select target_uuid as group_uuid, trash_at from temptable where trash_at is not NULL
+  on conflict (group_uuid) do update set trash_at=EXCLUDED.trash_at)
+
+select container_uuid from container_requests where
+  owner_uuid in (select target_uuid from temptable) and
+  requesting_container_uuid is NULL and state = 'Committed' and container_uuid is not NULL
+},
+      "Group.update_trash.select",
+      [[nil, self.uuid],
+       [nil, TrashedGroup.find_by_group_uuid(self.owner_uuid).andand.trash_at],
+       [nil, self.trash_at]]).each do |container_uuid|
       update_priorities container_uuid["container_uuid"]
     end
   end
 
   def update_frozen
     return unless saved_change_to_frozen_by_uuid? || saved_change_to_owner_uuid?
-    temptable = "group_subtree_#{rand(2**64).to_s(10)}"
-    ActiveRecord::Base.connection.exec_query(
-      "create temporary table #{temptable} on commit drop as select * from project_subtree_with_is_frozen($1,$2)",
-      "Group.update_frozen.select",
-      [[nil, self.uuid],
-       [nil, !self.frozen_by_uuid.nil?]])
+
     if frozen_by_uuid
-      rows = ActiveRecord::Base.connection.exec_query(
-        "select cr.uuid, cr.state from container_requests cr, #{temptable} frozen " +
-        "where cr.owner_uuid = frozen.uuid and frozen.is_frozen " +
-        "and cr.state not in ($1, $2) limit 1",
-        "Group.update_frozen.check_container_requests",
-        [[nil, ContainerRequest::Uncommitted],
-         [nil, ContainerRequest::Final]])
+      rows = ActiveRecord::Base.connection.exec_query(%{
+with temptable as (select * from project_subtree_with_is_frozen($1,$2))
+
+select cr.uuid, cr.state from container_requests cr, temptable frozen
+  where cr.owner_uuid = frozen.uuid and frozen.is_frozen
+  and cr.state not in ($3, $4) limit 1
+},
+                                                      "Group.update_frozen.check_container_requests",
+                                                      [[nil, self.uuid],
+                                                       [nil, !self.frozen_by_uuid.nil?],
+                                                       [nil, ContainerRequest::Uncommitted],
+                                                       [nil, ContainerRequest::Final]])
       if rows.any?
         raise ArgumentError.new("cannot freeze project containing container request #{rows.first['uuid']} with state = #{rows.first['state']}")
       end
     end
-    ActiveRecord::Base.connection.exec_delete(
-      "delete from frozen_groups where uuid in (select uuid from #{temptable} where not is_frozen)",
-      "Group.update_frozen.delete")
-    ActiveRecord::Base.connection.exec_query(
-      "insert into frozen_groups (uuid) select uuid from #{temptable} where is_frozen on conflict do nothing",
-      "Group.update_frozen.insert")
+
+ActiveRecord::Base.connection.exec_query(%{
+with temptable as (select * from project_subtree_with_is_frozen($1,$2)),
+
+delete_rows as (delete from frozen_groups where uuid in (select uuid from temptable where not is_frozen))
+
+insert into frozen_groups (uuid) select uuid from temptable where is_frozen on conflict do nothing
+}, "Group.update_frozen.update",
+                                         [[nil, self.uuid],
+                                          [nil, !self.frozen_by_uuid.nil?]])
+
   end
 
   def before_ownership_change
diff --git a/services/api/db/migrate/20231013000000_compute_permission_index.rb b/services/api/db/migrate/20231013000000_compute_permission_index.rb
new file mode 100644 (file)
index 0000000..ecd85ef
--- /dev/null
@@ -0,0 +1,27 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class ComputePermissionIndex < ActiveRecord::Migration[5.2]
+  def up
+    # The inner part of compute_permission_subgraph has a query clause like this:
+    #
+    #    where u.perm_origin_uuid = m.target_uuid AND m.traverse_owned
+    #         AND (m.user_uuid = m.target_uuid or m.target_uuid not like '_____-tpzed-_______________')
+    #
+    # This will end up doing a sequential scan on
+    # materialized_permissions, which can easily have millions of
+    # rows, unless we fully index the table for this query.  In one test,
+    # this brought the compute_permission_subgraph query from over 6
+    # seconds down to 250ms.
+    #
+    ActiveRecord::Base.connection.execute "drop index if exists index_materialized_permissions_target_is_not_user"
+    ActiveRecord::Base.connection.execute %{
+create index index_materialized_permissions_target_is_not_user on materialized_permissions (target_uuid, traverse_owned, (user_uuid = target_uuid or target_uuid not like '_____-tpzed-_______________'));
+}
+  end
+
+  def down
+    ActiveRecord::Base.connection.execute "drop index if exists index_materialized_permissions_target_is_not_user"
+  end
+end
index a26db2e5dbd659b73ee414aaf7768f4fb4a8e983..c0d4263d97aa6bdde258688d091b5cf69fdd47af 100644 (file)
@@ -62,10 +62,10 @@ with
      permission (permission origin is self).
   */
   perm_from_start(perm_origin_uuid, target_uuid, val, traverse_owned) as (
-    
+
 WITH RECURSIVE
         traverse_graph(origin_uuid, target_uuid, val, traverse_owned, starting_set) as (
-            
+
              values (perm_origin_uuid, starting_uuid, starting_perm,
                     should_traverse_owned(starting_uuid, starting_perm),
                     (perm_origin_uuid = starting_uuid or starting_uuid not like '_____-tpzed-_______________'))
@@ -107,10 +107,10 @@ case (edges.edge_id = perm_edge_id)
        can_manage permission granted by ownership.
   */
   additional_perms(perm_origin_uuid, target_uuid, val, traverse_owned) as (
-    
+
 WITH RECURSIVE
         traverse_graph(origin_uuid, target_uuid, val, traverse_owned, starting_set) as (
-            
+
     select edges.tail_uuid as origin_uuid, edges.head_uuid as target_uuid, edges.val,
            should_traverse_owned(edges.head_uuid, edges.val),
            edges.head_uuid like '_____-j7d0g-_______________'
@@ -342,30 +342,6 @@ SET default_tablespace = '';
 
 SET default_with_oids = false;
 
---
--- Name: groups; Type: TABLE; Schema: public; Owner: -
---
-
-CREATE TABLE public.groups (
-    id bigint NOT NULL,
-    uuid character varying(255),
-    owner_uuid character varying(255),
-    created_at timestamp without time zone NOT NULL,
-    modified_by_client_uuid character varying(255),
-    modified_by_user_uuid character varying(255),
-    modified_at timestamp without time zone,
-    name character varying(255) NOT NULL,
-    description character varying(524288),
-    updated_at timestamp without time zone NOT NULL,
-    group_class character varying(255),
-    trash_at timestamp without time zone,
-    is_trashed boolean DEFAULT false NOT NULL,
-    delete_at timestamp without time zone,
-    properties jsonb DEFAULT '{}'::jsonb,
-    frozen_by_uuid character varying
-);
-
-
 --
 -- Name: api_client_authorizations; Type: TABLE; Schema: public; Owner: -
 --
@@ -690,6 +666,30 @@ CREATE TABLE public.frozen_groups (
 );
 
 
+--
+-- Name: groups; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.groups (
+    id bigint NOT NULL,
+    uuid character varying(255),
+    owner_uuid character varying(255),
+    created_at timestamp without time zone NOT NULL,
+    modified_by_client_uuid character varying(255),
+    modified_by_user_uuid character varying(255),
+    modified_at timestamp without time zone,
+    name character varying(255) NOT NULL,
+    description character varying(524288),
+    updated_at timestamp without time zone NOT NULL,
+    group_class character varying(255),
+    trash_at timestamp without time zone,
+    is_trashed boolean DEFAULT false NOT NULL,
+    delete_at timestamp without time zone,
+    properties jsonb DEFAULT '{}'::jsonb,
+    frozen_by_uuid character varying
+);
+
+
 --
 -- Name: groups_id_seq; Type: SEQUENCE; Schema: public; Owner: -
 --
@@ -2590,6 +2590,13 @@ CREATE INDEX index_logs_on_summary ON public.logs USING btree (summary);
 CREATE UNIQUE INDEX index_logs_on_uuid ON public.logs USING btree (uuid);
 
 
+--
+-- Name: index_materialized_permissions_target_is_not_user; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_materialized_permissions_target_is_not_user ON public.materialized_permissions USING btree (target_uuid, traverse_owned, ((((user_uuid)::text = (target_uuid)::text) OR ((target_uuid)::text !~~ '_____-tpzed-_______________'::text))));
+
+
 --
 -- Name: index_nodes_on_created_at; Type: INDEX; Schema: public; Owner: -
 --
@@ -3308,6 +3315,5 @@ INSERT INTO "schema_migrations" (version) VALUES
 ('20230503224107'),
 ('20230815160000'),
 ('20230821000000'),
-('20230922000000');
-
-
+('20230922000000'),
+('20231013000000');
index b7e5476404869f6a89603302eafe828397acd1c5..5c8072b48a5594b18200725feb0ab5515b72c963 100644 (file)
@@ -100,44 +100,41 @@ def update_permissions perm_origin_uuid, starting_uuid, perm_level, edge_id=nil
     # tested this on Postgres 9.6, so in the future we should reevaluate
     # the performance & query plan on Postgres 12.
     #
+    # Update: as of 2023-10-13, incorrect merge join behavior is still
+    # observed on at least one major user installation that is using
+    # Postgres 14, so it seems this workaround is still needed.
+    #
     # https://git.furworks.de/opensourcemirror/postgresql/commit/a314c34079cf06d05265623dd7c056f8fa9d577f
     #
     # Disable merge join for just this query (also local for this transaction), then reenable it.
     ActiveRecord::Base.connection.exec_query "SET LOCAL enable_mergejoin to false;"
 
-    temptable_perms = "temp_perms_#{rand(2**64).to_s(10)}"
-    ActiveRecord::Base.connection.exec_query %{
-create temporary table #{temptable_perms} on commit drop
-as select * from compute_permission_subgraph($1, $2, $3, $4)
-},
-                                             'update_permissions.select',
-                                             [[nil, perm_origin_uuid],
-                                              [nil, starting_uuid],
-                                              [nil, perm_level],
-                                              [nil, edge_id]]
-
-    ActiveRecord::Base.connection.exec_query "SET LOCAL enable_mergejoin to true;"
-
-    # Now that we have recomputed a set of permissions, delete any
-    # rows from the materialized_permissions table where (target_uuid,
-    # user_uuid) is not present or has perm_level=0 in the recomputed
-    # set.
-    ActiveRecord::Base.connection.exec_delete %{
-delete from #{PERMISSION_VIEW} where
-  target_uuid in (select target_uuid from #{temptable_perms}) and
-  not exists (select 1 from #{temptable_perms}
-              where target_uuid=#{PERMISSION_VIEW}.target_uuid and
-                    user_uuid=#{PERMISSION_VIEW}.user_uuid and
-                    val>0)
-},
-                                              "update_permissions.delete"
-
-    # Now insert-or-update permissions in the recomputed set.  The
-    # WHERE clause is important to avoid redundantly updating rows
-    # that haven't actually changed.
     ActiveRecord::Base.connection.exec_query %{
+with temptable_perms as (
+  select * from compute_permission_subgraph($1, $2, $3, $4)),
+
+/*
+    Now that we have recomputed a set of permissions, delete any
+    rows from the materialized_permissions table where (target_uuid,
+    user_uuid) is not present or has perm_level=0 in the recomputed
+    set.
+*/
+delete_rows as (
+  delete from #{PERMISSION_VIEW} where
+    target_uuid in (select target_uuid from temptable_perms) and
+    not exists (select 1 from temptable_perms
+                where target_uuid=#{PERMISSION_VIEW}.target_uuid and
+                      user_uuid=#{PERMISSION_VIEW}.user_uuid and
+                      val>0)
+)
+
+/*
+  Now insert-or-update permissions in the recomputed set.  The
+  WHERE clause is important to avoid redundantly updating rows
+  that haven't actually changed.
+*/
 insert into #{PERMISSION_VIEW} (user_uuid, target_uuid, perm_level, traverse_owned)
-  select user_uuid, target_uuid, val as perm_level, traverse_owned from #{temptable_perms} where val>0
+  select user_uuid, target_uuid, val as perm_level, traverse_owned from temptable_perms where val>0
 on conflict (user_uuid, target_uuid) do update
 set perm_level=EXCLUDED.perm_level, traverse_owned=EXCLUDED.traverse_owned
 where #{PERMISSION_VIEW}.user_uuid=EXCLUDED.user_uuid and
@@ -145,7 +142,11 @@ where #{PERMISSION_VIEW}.user_uuid=EXCLUDED.user_uuid and
        (#{PERMISSION_VIEW}.perm_level != EXCLUDED.perm_level or
         #{PERMISSION_VIEW}.traverse_owned != EXCLUDED.traverse_owned);
 },
-                                             "update_permissions.insert"
+                                             'update_permissions.select',
+                                             [[nil, perm_origin_uuid],
+                                              [nil, starting_uuid],
+                                              [nil, perm_level],
+                                              [nil, edge_id]]
 
     if perm_level>0
       check_permissions_against_full_refresh