12018: Added arv-sync-groups command, to synchronize groups membership
authorLucas Di Pentima <ldipentima@veritasgenetics.com>
Wed, 27 Sep 2017 03:14:20 +0000 (00:14 -0300)
committerLucas Di Pentima <ldipentima@veritasgenetics.com>
Wed, 27 Sep 2017 03:14:20 +0000 (00:14 -0300)
from a CSV file.

Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima@veritasgenetics.com>

sdk/cli/bin/arv-sync-groups [new file with mode: 0755]
sdk/python/arvados/commands/sync_groups.py [new file with mode: 0644]

diff --git a/sdk/cli/bin/arv-sync-groups b/sdk/cli/bin/arv-sync-groups
new file mode 100755 (executable)
index 0000000..f744bff
--- /dev/null
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from arvados.commands.sync_groups import main
+main()
diff --git a/sdk/python/arvados/commands/sync_groups.py b/sdk/python/arvados/commands/sync_groups.py
new file mode 100644 (file)
index 0000000..2bc08e2
--- /dev/null
@@ -0,0 +1,189 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+import arvados
+import csv
+import logging
+import os
+import sys
+
+from apiclient import errors as apiclient_errors
+from arvados._version import __version__
+
+import arvados.commands._util as arv_cmd
+
+api_client = None
+
+GROUP_TAG = 'remote_group'
+
+opts = argparse.ArgumentParser(add_help=False)
+
+opts.add_argument('--version', action='version',
+                    version="%s %s" % (sys.argv[0], __version__),
+                    help='Print version and exit.')
+opts.add_argument('--verbose', action='store_true', default=False,
+                  help="""
+Log informational messages. By default is deactivated.
+""")
+opts.add_argument('path', metavar='PATH', type=str, 
+                    help="""
+Local file path containing a CSV-like format.
+""")
+
+_user_id = opts.add_mutually_exclusive_group()
+_user_id.add_argument('--user-email', action='store_true', default=True,
+                       help="""
+Identify users by their email addresses instead of user names.
+This is the default.
+""")
+_user_id.add_argument('--user-name', action='store_false', dest='user_email',
+                      help="""
+Identify users by their name instead of email addresses.
+""")
+
+arg_parser = argparse.ArgumentParser(
+    description='Synchronize group memberships from a CSV file.',
+    parents=[opts, arv_cmd.retry_opt])
+
+def parse_arguments(arguments):
+    args = arg_parser.parse_args(arguments)
+    if args.path is None or args.path == '':
+        arg_parser.error("Please provide a path to an input file.")
+    elif not os.path.exists(args.path):
+        arg_parser.error("File not found: '%s'" % args.path)
+    elif not os.path.isfile(args.path):
+        arg_parser.error("Path provided is not a file: '%s'" % args.path)
+    return args
+
+def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
+    global api_client
+
+    args = parse_arguments(arguments)
+    logger = logging.getLogger('arvados.arv_sync_groups')
+
+    if api_client is None:
+        api_client = arvados.api('v1')
+
+    # How are users going to be identified on the input file?
+    if args.user_email:
+        user_id = 'email'
+    else:
+        user_id = 'username'
+    
+    if args.verbose:
+        logger.setLevel(logging.INFO)
+        
+    logger.info("Group sync starting. Using '%s' as users id" % user_id)
+    
+    # Get the complete user list to minimize API Server requests
+    all_users = {}
+    userid_to_uuid = {} # Index by user_id (email/username)
+    for u in arvados.util.list_all(api_client.users().list, args.retries):
+        all_users[u['uuid']] = u
+        userid_to_uuid[u[user_id]] = u['uuid']
+    logger.info('Found %d users' % len(all_users))
+
+    # Request all UUIDs for groups tagged as remote
+    remote_group_uuids = set()
+    for link in arvados.util.list_all(
+                            api_client.links().list, 
+                            args.retries,
+                            filters=[['link_class', '=', 'tag'],
+                                     ['name', '=', GROUP_TAG],
+                                     ['head_kind', '=', 'arvados#group']]):
+        remote_group_uuids.add(link['head_uuid'])
+    # Get remote groups and their members
+    remote_groups = {}
+    group_name_to_uuid = {} # Index by group name
+    for group in arvados.util.list_all(
+                            api_client.groups().list,
+                            args.retries,
+                            filters=[['uuid', 'in', list(remote_group_uuids)]]):
+        member_links = arvados.util.list_all(
+                            api_client.links().list,
+                            args.retries,
+                            filters=[['link_class', '=', 'permission'],
+                                      ['name', '=', 'can_read'],
+                                      ['tail_uuid', '=', group['uuid']],
+                                      ['head_kind', '=', 'arvados#user']])
+        # Build a list of user_ids (email/username) belonging to this group
+        members = set([all_users[link['head_uuid']][user_id] 
+                       for link in member_links])
+        remote_groups[group['uuid']] = {'object': group,
+                                        'previous_members': members,
+                                        'current_members': set()}
+        # FIXME: There's an index (group_name, group.owner_uuid), should we
+        # ask for our own groups tagged as remote? (with own being 'system'?)
+        group_name_to_uuid[group['name']] = group['uuid']
+    logger.info('Found %d remote groups' % len(remote_groups))
+    
+    groups_created = 0
+    members_added = 0
+    members_removed = 0
+    with open(args.path, 'rb') as f:
+        reader = csv.reader(f)
+        try:
+            for group, user in reader:
+                group = group.strip()
+                user = user.strip()
+                if not user in userid_to_uuid:
+                    # User not present on the system, skip.
+                    logger.warning("There's no user with %s '%s' on the system"
+                                   ", skipping." % (user_id, user))
+                    continue
+                if not group in group_name_to_uuid:
+                    # Group doesn't exist, create and tag it before continuing
+                    g = api_client.groups().create(body={
+                        'name': group}).execute(num_retries=args.retries)
+                    api_client.links().create(body={
+                        'link_class': 'tag',
+                        'name': GROUP_TAG,
+                        'head_uuid': g['uuid'],
+                    }).execute(num_retries=args.retries)
+                    # Update cached group data
+                    group_name_to_uuid[g['name']] = g['uuid']
+                    remote_groups[g['uuid']] = {'object': g,
+                                                'previous_members': set(),
+                                                'current_members': set()}
+                    groups_created += 1
+                # Both group & user exist, check if user is a member
+                g_uuid = group_name_to_uuid[group]
+                if not user in remote_groups[g_uuid]['previous_members']:
+                    # User wasn't a member, but should.
+                    api_client.links().create(body={
+                        'link_class': 'permission',
+                        'name': 'can_read',
+                        'tail_uuid': g_uuid,
+                        'head_uuid': userid_to_uuid[user],
+                    }).execute(num_retries=args.retries)
+                    members_added += 1
+                remote_groups[g_uuid]['current_members'].add(user)
+        except (ValueError, csv.Error) as e:
+            logger.warning('Error on line %d: %s' % (reader.line_num, e))
+    # Remove previous members not listed on this run
+    for group_uuid in remote_groups:
+        previous = remote_groups[group_uuid]['previous_members']
+        current = remote_groups[group_uuid]['current_members']
+        evicted = previous - current
+        if len(evicted) > 0:
+            logger.info("Removing %d users from group '%s'" % (
+                len(evicted), remote_groups[group_uuid]['object']['name']))
+        for evicted_user in evicted:
+            links = arvados.util.list_all(
+                api_client.links().list,
+                args.retries,
+                filters=[['link_class', '=', 'permission'],
+                         ['name', '=', 'can_read'],
+                         ['tail_uuid', '=', group_uuid],
+                         ['head_uuid', '=', userid_to_uuid[evicted_user]]])
+            for l in links:
+                api_client.links().delete(
+                    uuid=l['uuid']).execute(num_retries=args.retries)
+            members_removed += 1
+    logger.info("Groups created: %d, members added: %s, members removed: %d" % \
+                (groups_created, members_added, members_removed))
+
+if __name__ == '__main__':
+    main()