--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+import arvados
+import csv
+import logging
+import os
+import sys
+
+from apiclient import errors as apiclient_errors
+from arvados._version import __version__
+
+import arvados.commands._util as arv_cmd
+
+api_client = None
+
+GROUP_TAG = 'remote_group'
+
+opts = argparse.ArgumentParser(add_help=False)
+
+opts.add_argument('--version', action='version',
+ version="%s %s" % (sys.argv[0], __version__),
+ help='Print version and exit.')
+opts.add_argument('--verbose', action='store_true', default=False,
+ help="""
+Log informational messages. By default is deactivated.
+""")
+opts.add_argument('path', metavar='PATH', type=str,
+ help="""
+Local file path containing a CSV-like format.
+""")
+
+_user_id = opts.add_mutually_exclusive_group()
+_user_id.add_argument('--user-email', action='store_true', default=True,
+ help="""
+Identify users by their email addresses instead of user names.
+This is the default.
+""")
+_user_id.add_argument('--user-name', action='store_false', dest='user_email',
+ help="""
+Identify users by their name instead of email addresses.
+""")
+
+arg_parser = argparse.ArgumentParser(
+ description='Synchronize group memberships from a CSV file.',
+ parents=[opts, arv_cmd.retry_opt])
+
+def parse_arguments(arguments):
+ args = arg_parser.parse_args(arguments)
+ if args.path is None or args.path == '':
+ arg_parser.error("Please provide a path to an input file.")
+ elif not os.path.exists(args.path):
+ arg_parser.error("File not found: '%s'" % args.path)
+ elif not os.path.isfile(args.path):
+ arg_parser.error("Path provided is not a file: '%s'" % args.path)
+ return args
+
+def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
+ global api_client
+
+ args = parse_arguments(arguments)
+ logger = logging.getLogger('arvados.arv_sync_groups')
+
+ if api_client is None:
+ api_client = arvados.api('v1')
+
+ # How are users going to be identified on the input file?
+ if args.user_email:
+ user_id = 'email'
+ else:
+ user_id = 'username'
+
+ if args.verbose:
+ logger.setLevel(logging.INFO)
+
+ logger.info("Group sync starting. Using '%s' as users id" % user_id)
+
+ # Get the complete user list to minimize API Server requests
+ all_users = {}
+ userid_to_uuid = {} # Index by user_id (email/username)
+ for u in arvados.util.list_all(api_client.users().list, args.retries):
+ all_users[u['uuid']] = u
+ userid_to_uuid[u[user_id]] = u['uuid']
+ logger.info('Found %d users' % len(all_users))
+
+ # Request all UUIDs for groups tagged as remote
+ remote_group_uuids = set()
+ for link in arvados.util.list_all(
+ api_client.links().list,
+ args.retries,
+ filters=[['link_class', '=', 'tag'],
+ ['name', '=', GROUP_TAG],
+ ['head_kind', '=', 'arvados#group']]):
+ remote_group_uuids.add(link['head_uuid'])
+ # Get remote groups and their members
+ remote_groups = {}
+ group_name_to_uuid = {} # Index by group name
+ for group in arvados.util.list_all(
+ api_client.groups().list,
+ args.retries,
+ filters=[['uuid', 'in', list(remote_group_uuids)]]):
+ member_links = arvados.util.list_all(
+ api_client.links().list,
+ args.retries,
+ filters=[['link_class', '=', 'permission'],
+ ['name', '=', 'can_read'],
+ ['tail_uuid', '=', group['uuid']],
+ ['head_kind', '=', 'arvados#user']])
+ # Build a list of user_ids (email/username) belonging to this group
+ members = set([all_users[link['head_uuid']][user_id]
+ for link in member_links])
+ remote_groups[group['uuid']] = {'object': group,
+ 'previous_members': members,
+ 'current_members': set()}
+ # FIXME: There's an index (group_name, group.owner_uuid), should we
+ # ask for our own groups tagged as remote? (with own being 'system'?)
+ group_name_to_uuid[group['name']] = group['uuid']
+ logger.info('Found %d remote groups' % len(remote_groups))
+
+ groups_created = 0
+ members_added = 0
+ members_removed = 0
+ with open(args.path, 'rb') as f:
+ reader = csv.reader(f)
+ try:
+ for group, user in reader:
+ group = group.strip()
+ user = user.strip()
+ if not user in userid_to_uuid:
+ # User not present on the system, skip.
+ logger.warning("There's no user with %s '%s' on the system"
+ ", skipping." % (user_id, user))
+ continue
+ if not group in group_name_to_uuid:
+ # Group doesn't exist, create and tag it before continuing
+ g = api_client.groups().create(body={
+ 'name': group}).execute(num_retries=args.retries)
+ api_client.links().create(body={
+ 'link_class': 'tag',
+ 'name': GROUP_TAG,
+ 'head_uuid': g['uuid'],
+ }).execute(num_retries=args.retries)
+ # Update cached group data
+ group_name_to_uuid[g['name']] = g['uuid']
+ remote_groups[g['uuid']] = {'object': g,
+ 'previous_members': set(),
+ 'current_members': set()}
+ groups_created += 1
+ # Both group & user exist, check if user is a member
+ g_uuid = group_name_to_uuid[group]
+ if not user in remote_groups[g_uuid]['previous_members']:
+ # User wasn't a member, but should.
+ api_client.links().create(body={
+ 'link_class': 'permission',
+ 'name': 'can_read',
+ 'tail_uuid': g_uuid,
+ 'head_uuid': userid_to_uuid[user],
+ }).execute(num_retries=args.retries)
+ members_added += 1
+ remote_groups[g_uuid]['current_members'].add(user)
+ except (ValueError, csv.Error) as e:
+ logger.warning('Error on line %d: %s' % (reader.line_num, e))
+ # Remove previous members not listed on this run
+ for group_uuid in remote_groups:
+ previous = remote_groups[group_uuid]['previous_members']
+ current = remote_groups[group_uuid]['current_members']
+ evicted = previous - current
+ if len(evicted) > 0:
+ logger.info("Removing %d users from group '%s'" % (
+ len(evicted), remote_groups[group_uuid]['object']['name']))
+ for evicted_user in evicted:
+ links = arvados.util.list_all(
+ api_client.links().list,
+ args.retries,
+ filters=[['link_class', '=', 'permission'],
+ ['name', '=', 'can_read'],
+ ['tail_uuid', '=', group_uuid],
+ ['head_uuid', '=', userid_to_uuid[evicted_user]]])
+ for l in links:
+ api_client.links().delete(
+ uuid=l['uuid']).execute(num_retries=args.retries)
+ members_removed += 1
+ logger.info("Groups created: %d, members added: %s, members removed: %d" % \
+ (groups_created, members_added, members_removed))
+
+if __name__ == '__main__':
+ main()