1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
12 from apiclient import errors as apiclient_errors
13 from arvados._version import __version__
15 import arvados.commands._util as arv_cmd
19 GROUP_TAG = 'remote_group'
21 opts = argparse.ArgumentParser(add_help=False)
23 opts.add_argument('--version', action='version',
24 version="%s %s" % (sys.argv[0], __version__),
25 help='Print version and exit.')
26 opts.add_argument('--verbose', action='store_true', default=False,
28 Log informational messages. By default is deactivated.
30 opts.add_argument('path', metavar='PATH', type=str,
32 Local file path containing a CSV-like format.
35 _user_id = opts.add_mutually_exclusive_group()
36 _user_id.add_argument('--user-email', action='store_true', default=True,
38 Identify users by their email addresses instead of user names.
41 _user_id.add_argument('--user-name', action='store_false', dest='user_email',
43 Identify users by their name instead of email addresses.
46 arg_parser = argparse.ArgumentParser(
47 description='Synchronize group memberships from a CSV file.',
48 parents=[opts, arv_cmd.retry_opt])
50 def parse_arguments(arguments):
51 args = arg_parser.parse_args(arguments)
52 if args.path is None or args.path == '':
53 arg_parser.error("Please provide a path to an input file.")
54 elif not os.path.exists(args.path):
55 arg_parser.error("File not found: '%s'" % args.path)
56 elif not os.path.isfile(args.path):
57 arg_parser.error("Path provided is not a file: '%s'" % args.path)
60 def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
63 args = parse_arguments(arguments)
64 logger = logging.getLogger('arvados.arv_sync_groups')
66 if api_client is None:
67 api_client = arvados.api('v1')
69 # How are users going to be identified on the input file?
76 logger.setLevel(logging.INFO)
78 logger.info("Group sync starting. Using '%s' as users id" % user_id)
80 # Get the complete user list to minimize API Server requests
82 userid_to_uuid = {} # Index by user_id (email/username)
83 for u in arvados.util.list_all(api_client.users().list, args.retries):
84 all_users[u['uuid']] = u
85 userid_to_uuid[u[user_id]] = u['uuid']
86 logger.info('Found %d users' % len(all_users))
88 # Request all UUIDs for groups tagged as remote
89 remote_group_uuids = set()
90 for link in arvados.util.list_all(
91 api_client.links().list,
93 filters=[['link_class', '=', 'tag'],
94 ['name', '=', GROUP_TAG],
95 ['head_kind', '=', 'arvados#group']]):
96 remote_group_uuids.add(link['head_uuid'])
97 # Get remote groups and their members
99 group_name_to_uuid = {} # Index by group name
100 for group in arvados.util.list_all(
101 api_client.groups().list,
103 filters=[['uuid', 'in', list(remote_group_uuids)]]):
104 member_links = arvados.util.list_all(
105 api_client.links().list,
107 filters=[['link_class', '=', 'permission'],
108 ['name', '=', 'can_read'],
109 ['tail_uuid', '=', group['uuid']],
110 ['head_kind', '=', 'arvados#user']])
111 # Build a list of user_ids (email/username) belonging to this group
112 members = set([all_users[link['head_uuid']][user_id]
113 for link in member_links])
114 remote_groups[group['uuid']] = {'object': group,
115 'previous_members': members,
116 'current_members': set()}
117 # FIXME: There's an index (group_name, group.owner_uuid), should we
118 # ask for our own groups tagged as remote? (with own being 'system'?)
119 group_name_to_uuid[group['name']] = group['uuid']
120 logger.info('Found %d remote groups' % len(remote_groups))
125 with open(args.path, 'rb') as f:
126 reader = csv.reader(f)
128 for group, user in reader:
129 group = group.strip()
131 if not user in userid_to_uuid:
132 # User not present on the system, skip.
133 logger.warning("There's no user with %s '%s' on the system"
134 ", skipping." % (user_id, user))
136 if not group in group_name_to_uuid:
137 # Group doesn't exist, create and tag it before continuing
138 g = api_client.groups().create(body={
139 'name': group}).execute(num_retries=args.retries)
140 api_client.links().create(body={
143 'head_uuid': g['uuid'],
144 }).execute(num_retries=args.retries)
145 # Update cached group data
146 group_name_to_uuid[g['name']] = g['uuid']
147 remote_groups[g['uuid']] = {'object': g,
148 'previous_members': set(),
149 'current_members': set()}
151 # Both group & user exist, check if user is a member
152 g_uuid = group_name_to_uuid[group]
153 if not (user in remote_groups[g_uuid]['previous_members'] or
154 user in remote_groups[g_uuid]['current_members']):
155 # User wasn't a member, but should.
156 api_client.links().create(body={
157 'link_class': 'permission',
160 'head_uuid': userid_to_uuid[user],
161 }).execute(num_retries=args.retries)
163 remote_groups[g_uuid]['current_members'].add(user)
164 except (ValueError, csv.Error) as e:
165 logger.warning('Error on line %d: %s' % (reader.line_num, e))
166 # Remove previous members not listed on this run
167 for group_uuid in remote_groups:
168 previous = remote_groups[group_uuid]['previous_members']
169 current = remote_groups[group_uuid]['current_members']
170 evicted = previous - current
172 logger.info("Removing %d users from group '%s'" % (
173 len(evicted), remote_groups[group_uuid]['object']['name']))
174 for evicted_user in evicted:
175 links = arvados.util.list_all(
176 api_client.links().list,
178 filters=[['link_class', '=', 'permission'],
179 ['name', '=', 'can_read'],
180 ['tail_uuid', '=', group_uuid],
181 ['head_uuid', '=', userid_to_uuid[evicted_user]]])
183 api_client.links().delete(
184 uuid=l['uuid']).execute(num_retries=args.retries)
186 logger.info("Groups created: %d, members added: %s, members removed: %d" % \
187 (groups_created, members_added, members_removed))
189 if __name__ == '__main__':