class PermissionTable < ActiveRecord::Migration[5.0]
def up
+ # This is a major migration. We are replacing the
+ # materialized_permission_view, which is fully recomputed any time
+ # a permission changes (and becomes very expensive as the number
+ # of users/groups becomes large), with a new strategy that only
+ # recomputes permissions for the subset of objects that are
+ # potentially affected by the addition or removal of a permission
+ # relationship (i.e. ownership or a permission link).
+ #
+ # This also disentangles the concept of "trashed groups" from the
+ # permissions system. Updating trashed items follows a similar
+ # (but less complicated) strategy to updating permissions, so it
+ # may be helpful to look at that first.
+ #
+
ActiveRecord::Base.connection.execute "DROP MATERIALIZED VIEW IF EXISTS materialized_permission_view;"
drop_table :permission_refresh_lock
- create_table :materialized_permissions, :id => false do |t|
- t.string :user_uuid
- t.string :target_uuid
- t.integer :perm_level
- t.boolean :traverse_owned
+ # This table stores the set of trashed groups and their trash_at
+ # time. Used to exclude trashed projects and their contents when
+ # getting object listings.
+ create_table :trashed_groups, :id => false do |t|
+ t.string :group_uuid
+ t.datetime :trash_at
end
- add_index :materialized_permissions, [:user_uuid, :target_uuid], unique: true, name: 'permission_user_target'
- add_index :materialized_permissions, [:target_uuid], unique: false, name: 'permission_target'
+ add_index :trashed_groups, :group_uuid, :unique => true
+ #
+ # Starting from a project, recursively traverse all the projects
+ # underneath it and return a set of project uuids and trash_at
+ # times (may be null). The initial trash_at can be a timestamp or
+ # null. The trash_at time propagates downward to groups it owns,
+ # i.e. when a group is trashed, everything underneath it in the
+ # ownership hierarchy is also considered trashed. However, this
+ # is fact is recorded in the trashed_groups table, not by updating
+ # trash_at field in the groups table.
+ #
ActiveRecord::Base.connection.execute %{
create or replace function project_subtree_with_trash_at (starting_uuid varchar(27), starting_trash_at timestamp)
returns table (target_uuid varchar(27), trash_at timestamp)
$$;
}
- create_table :trashed_groups, :id => false do |t|
- t.string :group_uuid
- t.datetime :trash_at
- end
- add_index :trashed_groups, :group_uuid, :unique => true
-
+ # Helper function to populate trashed_groups table. This starts
+ # with each group owned by a user and computes the subtree under
+ # that group to find any groups that are trashed.
ActiveRecord::Base.connection.execute %{
create or replace function compute_trashed ()
returns table (uuid varchar(27), trash_at timestamp)
$$;
}
+ # Now populate the table. For a non-test databse this is the only
+ # time this ever happens, after this the trash table is updated
+ # incrementally. See app/models/group.rb#update_trash
ActiveRecord::Base.connection.execute("INSERT INTO trashed_groups select * from compute_trashed()")
+
+ # The table to store the flattened permissions. This is almost
+ # exactly the same as the old materalized_permission_view except
+ # that the target_owner_uuid colunm in the view is now just a
+ # boolean traverse_owned (the column was only ever tested for null
+ # or non-null).
+ #
+ # For details on how this table is used to apply permissions to
+ # queries, see app/models/arvados_model.rb#readable_by
+ #
+ create_table :materialized_permissions, :id => false do |t|
+ t.string :user_uuid
+ t.string :target_uuid
+ t.integer :perm_level
+ t.boolean :traverse_owned
+ end
+ add_index :materialized_permissions, [:user_uuid, :target_uuid], unique: true, name: 'permission_user_target'
+ add_index :materialized_permissions, [:target_uuid], unique: false, name: 'permission_target'
+
+ # Helper function. Determines if permission on an object implies
+ # transitive permission to things the object owns. This is always
+ # true for groups, but only true for users when the permission
+ # level is can_manage.
ActiveRecord::Base.connection.execute %{
create or replace function should_traverse_owned (starting_uuid varchar(27),
starting_perm integer)
returns bool
-STABLE
+IMMUTABLE
language SQL
as $$
select starting_uuid like '_____-j7d0g-_______________' or
$$;
}
+ # Merge all permission relationships into a single view. This
+ # consists of: groups (projects) owning things, users owning
+ # things, and explicit permission links.
+ #
+ # Fun fact, a SQL view gets inlined into the query where it is
+ # used, this enables the query planner to inject constraints, so
+ # when using the view we only look up edges we plan to traverse
+ # and avoid a brute force computation of all edges.
ActiveRecord::Base.connection.execute %{
create view permission_graph_edges as
select groups.owner_uuid as tail_uuid, groups.uuid as head_uuid, (3) as val from groups
where links.link_class='permission'
}
- # Get a set of permission by searching the graph and following
- # ownership and permission links.
- #
- # edges() - a subselect with the union of ownership and permission links
- #
- # traverse_graph() - recursive query, from the starting node,
- # self-join with edges to find outgoing permissions.
- # Re-runs the query on new rows until there are no more results.
- # This accomplishes a breadth-first search of the permission graph.
- #
+ # From starting_uuid, perform a recursive self-join on the edges
+ # to follow chains of permissions. This is a breadth-first search
+ # of the permission graph. Permission is propagated across edges,
+ # which may narrow the permission for subsequent links (eg I start
+ # at can_manage but when traversing a can_read link everything
+ # touched through that link will only be can_read).
+ #
+ # Yields the set of objects that are potentially affected, and
+ # their permission levels granted by having starting_perm on
+ # starting_uuid.
+ #
+ # If starting_uuid is a user, this computes the entire set of
+ # permissions for that user (because it returns everything that is
+ # reachable by that user).
+ #
+ # Used by compute_permission_subgraph below.
ActiveRecord::Base.connection.execute %{
create or replace function search_permission_graph (starting_uuid varchar(27),
starting_perm integer)
$$;
}
+ # This is the key function.
+ #
+ # perm_origin_uuid: The object that 'gets' or 'has' the permission.
+ #
+ # starting_uuid: The starting object the permission applies to.
+ #
+ # starting_perm: The permission that perm_origin_uuid 'has' on starting_uuid
+ # One of 1, 2, 3 for can_read, can_write, can_manage
+ # respectively, or 0 to revoke permissions.
+ #
+ # This function is broken up into a number of phases.
+ #
+ # 1. perm_from_start: Gets the initial set of objects potentially
+ # affected by the permission change, using
+ # search_permission_graph.
+ #
+ # 2. additional_perms: Finds other inbound edges that grant
+ # permissions on the objects in perm_from_start, and computes
+ # permissions that originate from those. This is required to
+ # handle the case where there is more than one path through which
+ # a user gets permission to an object. For example, a user owns a
+ # project and also shares it can_read with a group the user
+ # belongs to, adding the can_read link must not overwrite the
+ # existing can_manage permission granted by ownership.
+ #
+ # 3. partial_perms: Combine the permissions computed in the first two phases.
+ #
+ # 4. user_identity_perms: If there are any users in the set of
+ # potentially affected objects and the user's owner was not
+ # traversed, recompute permissions for that user. This is
+ # required because users always have permission to themselves
+ # (identity property) which would be missing from the permission
+ # set if the user was traversed while computing permissions for
+ # another object.
+ #
+ # 5. all_perms: Combines perm_from_start, additional_perms, and user_identity_perms.
+ #
+ # 6. The actual query that produces rows to be added or removed
+ # from the materialized_permissions table. This is the clever
+ # bit.
+ #
+ # Key insight: because permissions are transitive (unless
+ # traverse_owned is false), by knowing the permissions granted
+ # from all the "origins" (perm_origin_uuid, tail_uuid of links
+ # where head_uuid is in our potentially affected set, etc) we can
+ # join with the materialized_permissions table to get user
+ # permissions on those origins, and apply that to the whole graph
+ # of objects reached through that origin.
+ #
ActiveRecord::Base.connection.execute %{
create or replace function compute_permission_subgraph (perm_origin_uuid varchar(27),
starting_uuid varchar(27),
language SQL
as $$
with
-perm_from_start(perm_origin_uuid, target_uuid, val, traverse_owned) as (
- select perm_origin_uuid, target_uuid, val, traverse_owned
- from search_permission_graph(starting_uuid, starting_perm)),
+ perm_from_start(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+ select perm_origin_uuid, target_uuid, val, traverse_owned
+ from search_permission_graph(starting_uuid, starting_perm)),
additional_perms(perm_origin_uuid, target_uuid, val, traverse_owned) as (
select edges.tail_uuid as perm_origin_uuid, ps.target_uuid, ps.val,
end
def refresh_trashed
+ ActiveRecord::Base.connection.execute("LOCK TABLE #{TRASHED_GROUPS}")
ActiveRecord::Base.connection.execute("DELETE FROM #{TRASHED_GROUPS}")
ActiveRecord::Base.connection.execute("INSERT INTO #{TRASHED_GROUPS} select * from compute_trashed()")
end
def update_permissions perm_origin_uuid, starting_uuid, perm_level, check=false
- # Update a subset of the permission graph
- # perm_level is the inherited permission
+ #
+ # Update a subset of the permission table affected by adding or
+ # removing a particular permission relationship (ownership or a
+ # permission link).
+ #
+ # perm_origin_uuid: This is the object that 'gets' the permission.
+ # It is the owner_uuid or tail_uuid.
+ #
+ # starting_uuid: The object we are computing permission for (or head_uuid)
+ #
+ # perm_level: The level of permission that perm_origin_uuid gets for starting_uuid.
+ #
# perm_level is a number from 0-3
# can_read=1
# can_write=2
# can_manage=3
- # call with perm_level=0 to revoke permissions
+ # or call with perm_level=0 to revoke permissions
#
- # 1. Compute set (group, permission) implied by traversing
- # graph starting at this group
- # 2. Find links from outside the graph that point inside
- # 3. For each starting uuid, get the set of permissions from the
- # materialized permission table
- # 3. Delete permissions from table not in our computed subset.
- # 4. Upsert each permission in our subset (user, group, val)
+ # check: for testing/debugging only, compare the result of the
+ # incremental update against a full table recompute. Throws an
+ # error if the contents are not identical (ie they produce different
+ # permission results)
+ # Theory of operation
+ #
+ # Give a change in a specific permission relationship, we recompute
+ # the set of permissions (for all users) that could possibly be
+ # affected by that relationship. For example, if a project is
+ # shared with another user, we recompute all permissions for all
+ # projects in the hierarchy. This returns a set of updated
+ # permissions, which we stash in a temporary table.
+ #
+ # Then, for each user_uuid/target_uuid in the updated permissions
+ # result set we insert/update a permission row in
+ # materialized_permissions, and delete any rows that exist in
+ # materialized_permissions that are not in the result set or have
+ # perm_level=0.
+ #
+ # see db/migrate/20200501150153_permission_table.rb for details on
+ # how the permissions are computed.
+
+ # "Conflicts with the ROW EXCLUSIVE, SHARE UPDATE EXCLUSIVE, SHARE
+ # ROW EXCLUSIVE, EXCLUSIVE, and ACCESS EXCLUSIVE lock modes. This
+ # mode protects a table against concurrent data changes."
ActiveRecord::Base.connection.execute "LOCK TABLE #{PERMISSION_VIEW} in SHARE MODE"
+ # Workaround for
+ # BUG #15160: planner overestimates number of rows in join when there are more than 200 rows coming from CTE
+ # https://www.postgresql.org/message-id/152395805004.19366.3107109716821067806@wrigleys.postgresql.org
+ #
+ # For a crucial join in the compute_permission_subgraph() query, the
+ # planner mis-estimates the number of rows in a Common Table
+ # Expression (CTE, this is a subquery in a WITH clause) and as a
+ # result it chooses the wrong join order. The join starts with the
+ # permissions table because it mistakenly thinks
+ # count(materalized_permissions) < count(new computed permissions)
+ # when actually it is the other way around.
+ #
+ # Because of the incorrect join order, it choose the wrong join
+ # strategy (merge join, which works best when two tables are roughly
+ # the same size). As a workaround, we can tell it not to use that
+ # join strategy, this causes it to pick hash join instead, which
+ # turns out to be a bit better. However, because the join order is
+ # still wrong, we don't get the full benefit of the index.
+ #
+ # This is very unfortunate because it makes the query performance
+ # dependent on the size of the materalized_permissions table, when
+ # the goal of this design was to make permission updates scale-free
+ # and only depend on the number of permissions affected and not the
+ # total table size. In several hours of researching I wasn't able
+ # to find a way to force the correct join order, so I'm calling it
+ # here and I have to move on.
+ #
+ # This is apparently addressed in Postgres 12, but I developed &
+ # tested this on Postgres 9.6, so in the future we should reevaluate
+ # the performance & query plan on Postgres 12.
+ #
+ # https://git.furworks.de/opensourcemirror/postgresql/commit/a314c34079cf06d05265623dd7c056f8fa9d577f
+ #
+ # Disable merge join for just this query (also local for this transaction), then reenable it.
ActiveRecord::Base.connection.exec_query "SET LOCAL enable_mergejoin to false;"
temptable_perms = "temp_perms_#{rand(2**64).to_s(10)}"