X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/de3276d1b2067be4aa40b51605c2ce509ea80915..97e40209600c094eac15085627e49da52ab0f517:/sdk/python/arvados/commands/federation_migrate.py diff --git a/sdk/python/arvados/commands/federation_migrate.py b/sdk/python/arvados/commands/federation_migrate.py index a7d9414d9d..770e1609db 100755 --- a/sdk/python/arvados/commands/federation_migrate.py +++ b/sdk/python/arvados/commands/federation_migrate.py @@ -3,6 +3,15 @@ # # SPDX-License-Identifier: Apache-2.0 +# +# Migration tool for merging user accounts belonging to the same user +# but on separate clusters to use a single user account managed by a +# specific cluster. +# +# If you're working on this, see +# arvados/sdk/python/tests/fed-migrate/README for information about +# the testing infrastructure. + import arvados import arvados.util import arvados.errors @@ -12,7 +21,10 @@ import argparse import hmac import urllib.parse import os +import hashlib +import re from arvados._version import __version__ +from . import _util as arv_cmd EMAIL=0 USERNAME=1 @@ -32,10 +44,10 @@ def connect_clusters(args): host = r[0] token = r[1] print("Contacting %s" % (host)) - arv = arvados.api(host=host, token=token, cache=False) + arv = arvados.api(host=host, token=token, cache=False, num_retries=args.retries) clusters[arv._rootDesc["uuidPrefix"]] = arv else: - arv = arvados.api(cache=False) + arv = arvados.api(cache=False, num_retries=args.retries) rh = arv._rootDesc["remoteHosts"] tok = arv.api_client_authorizations().current().execute() token = "v2/%s/%s" % (tok["uuid"], tok["api_token"]) @@ -56,8 +68,8 @@ def connect_clusters(args): errors.append("Inconsistent login cluster configuration, expected '%s' on %s but was '%s'" % (loginCluster, config["ClusterID"], config["Login"]["LoginCluster"])) continue - if arv._rootDesc["revision"] < "20190926": - errors.append("Arvados API server revision on cluster '%s' is too old, must be updated to at least Arvados 1.5 before running migration." % config["ClusterID"]) + if arv._rootDesc["revision"] < "20200331": + errors.append("Arvados API server revision on cluster '%s' is too old, must be updated to at least Arvados 2.0.2 before running migration." % config["ClusterID"]) continue try: @@ -67,7 +79,7 @@ def connect_clusters(args): continue if not cur["is_admin"]: - errors.append("Not admin of %s" % host) + errors.append("User %s is not admin on %s" % (cur["uuid"], arv._rootDesc["uuidPrefix"])) continue for r in clusters: @@ -85,13 +97,12 @@ def fetch_users(clusters, loginCluster): by_email = {} by_username = {} - users = [] - for c, arv in clusters.items(): - print("Getting user list from %s" % c) - ul = arvados.util.list_all(arv.users().list) - for l in ul: - if l["uuid"].startswith(c): - users.append(l) + users = [ + user + for prefix, arv in clusters.items() + for user in arvados.util.keyset_list_all(arv.users().list, bypass_federation=True) + if user['uuid'].startswith(prefix) + ] # Users list is sorted by email # Go through users and collect users with same email @@ -99,7 +110,7 @@ def fetch_users(clusters, loginCluster): # call add_accum_rows() to generate the report rows with # the "home cluster" set, and also fill in the by_email table. - users = sorted(users, key=lambda u: u["email"]+"::"+(u["username"] or "")+"::"+u["uuid"]) + users.sort(key=lambda u: (u["email"], u["username"] or "", u["uuid"])) accum = [] lastemail = None @@ -159,14 +170,20 @@ def read_migrations(args, by_email, by_username): def update_username(args, email, user_uuid, username, migratecluster, migratearv): print("(%s) Updating username of %s to '%s' on %s" % (email, user_uuid, username, migratecluster)) - if not args.dry_run: - try: - conflicts = migratearv.users().list(filters=[["username", "=", username]]).execute() - if conflicts["items"]: - migratearv.users().update(uuid=conflicts["items"][0]["uuid"], body={"user": {"username": username+"migrate"}}).execute() - migratearv.users().update(uuid=user_uuid, body={"user": {"username": username}}).execute() - except arvados.errors.ApiError as e: - print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, user_uuid, username, migratecluster, e)) + if args.dry_run: + return + try: + conflicts = migratearv.users().list(filters=[["username", "=", username]], bypass_federation=True).execute() + if conflicts["items"]: + # There's already a user with the username, move the old user out of the way + migratearv.users().update(uuid=conflicts["items"][0]["uuid"], + bypass_federation=True, + body={"user": {"username": username+"migrate"}}).execute() + migratearv.users().update(uuid=user_uuid, + bypass_federation=True, + body={"user": {"username": username}}).execute() + except arvados.errors.ApiError as e: + print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, user_uuid, username, migratecluster, e)) def choose_new_user(args, by_email, email, userhome, username, old_user_uuid, clusters): @@ -187,14 +204,27 @@ def choose_new_user(args, by_email, email, userhome, username, old_user_uuid, cl return None print("(%s) No user listed with same email to migrate %s to %s, will create new user with username '%s'" % (email, old_user_uuid, userhome, username)) if not args.dry_run: + oldhomecluster = old_user_uuid[0:5] + oldhomearv = clusters[oldhomecluster] newhomecluster = userhome[0:5] homearv = clusters[userhome] user = None try: - conflicts = homearv.users().list(filters=[["username", "=", username]]).execute() + olduser = oldhomearv.users().get(uuid=old_user_uuid).execute() + conflicts = homearv.users().list(filters=[["username", "=", username]], + bypass_federation=True).execute() if conflicts["items"]: - homearv.users().update(uuid=conflicts["items"][0]["uuid"], body={"user": {"username": username+"migrate"}}).execute() - user = homearv.users().create(body={"user": {"email": email, "username": username}}).execute() + homearv.users().update( + uuid=conflicts["items"][0]["uuid"], + bypass_federation=True, + body={"user": {"username": username+"migrate"}}).execute() + user = homearv.users().create( + body={"user": { + "email": email, + "first_name": olduser["first_name"], + "last_name": olduser["last_name"], + "username": username, + "is_active": olduser["is_active"]}}).execute() except arvados.errors.ApiError as e: print("(%s) Could not create user: %s" % (email, str(e))) return None @@ -227,69 +257,89 @@ def activate_remote_user(args, email, homearv, migratearv, old_user_uuid, new_us print("(%s) Could not create API token for %s: %s" % (email, new_user_uuid, e)) return None + try: + findolduser = migratearv.users().list(filters=[["uuid", "=", old_user_uuid]], bypass_federation=True).execute() + if len(findolduser["items"]) == 0: + return False + if len(findolduser["items"]) == 1: + olduser = findolduser["items"][0] + else: + print("(%s) Unexpected result" % (email)) + return None + except arvados.errors.ApiError as e: + print("(%s) Could not retrieve user %s from %s, user may have already been migrated: %s" % (email, old_user_uuid, migratecluster, e)) + return None + salted = 'v2/' + newtok["uuid"] + '/' + hmac.new(newtok["api_token"].encode(), msg=migratecluster.encode(), - digestmod='sha1').hexdigest() + digestmod=hashlib.sha1).hexdigest() try: ru = urllib.parse.urlparse(migratearv._rootDesc["rootUrl"]) if not args.dry_run: - newuser = arvados.api(host=ru.netloc, token=salted, insecure=os.environ.get("ARVADOS_API_HOST_INSECURE")).users().current().execute() + newuser = arvados.api(host=ru.netloc, token=salted, + insecure=os.environ.get("ARVADOS_API_HOST_INSECURE")).users().current().execute() else: - newuser = {"is_active": True, "username": username} + newuser = {"is_active": True, "username": email.split('@')[0], "is_admin": False} except arvados.errors.ApiError as e: print("(%s) Error getting user info for %s from %s: %s" % (email, new_user_uuid, migratecluster, e)) return None - try: - olduser = migratearv.users().get(uuid=old_user_uuid).execute() - except arvados.errors.ApiError as e: - if e.resp.status != 404: - print("(%s) Could not retrieve user %s from %s, user may have already been migrated: %s" % (email, old_user_uuid, migratecluster, e)) - return None - - if not newuser["is_active"]: + if not newuser["is_active"] and olduser["is_active"]: print("(%s) Activating user %s on %s" % (email, new_user_uuid, migratecluster)) try: if not args.dry_run: - migratearv.users().update(uuid=new_user_uuid, body={"is_active": True}).execute() + migratearv.users().update(uuid=new_user_uuid, bypass_federation=True, + body={"is_active": True}).execute() except arvados.errors.ApiError as e: print("(%s) Could not activate user %s on %s: %s" % (email, new_user_uuid, migratecluster, e)) return None if olduser["is_admin"] and not newuser["is_admin"]: - print("(%s) Not migrating %s because user is admin but target user %s is not admin on %s" % (email, old_user_uuid, new_user_uuid, migratecluster)) + print("(%s) Not migrating %s because user is admin but target user %s is not admin on %s. Please ensure the user admin status is the same on both clusters. Note that a federated admin account has admin privileges on the entire federation." % (email, old_user_uuid, new_user_uuid, migratecluster)) return None return newuser def migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid): + if args.dry_run: + return try: - if not args.dry_run: + new_owner_uuid = new_user_uuid + if args.data_into_subproject: grp = migratearv.groups().create(body={ "owner_uuid": new_user_uuid, "name": "Migrated from %s (%s)" % (email, old_user_uuid), "group_class": "project" }, ensure_unique_name=True).execute() - migratearv.users().merge(old_user_uuid=old_user_uuid, - new_user_uuid=new_user_uuid, - new_owner_uuid=grp["uuid"], - redirect_to_new_user=True).execute() + new_owner_uuid = grp["uuid"] + migratearv.users().merge(old_user_uuid=old_user_uuid, + new_user_uuid=new_user_uuid, + new_owner_uuid=new_owner_uuid, + redirect_to_new_user=True).execute() except arvados.errors.ApiError as e: - print("(%s) Error migrating user: %s" % (email, e)) + name_collision = re.search(r'Key \(owner_uuid, name\)=\((.*?), (.*?)\) already exists\.\n.*UPDATE "(.*?)"', e._get_reason()) + if name_collision: + target_owner, rsc_name, rsc_type = name_collision.groups() + print("(%s) Cannot migrate to %s because both origin and target users have a %s named '%s'. Please rename the conflicting items or use --data-into-subproject to migrate all users' data into a special subproject." % (email, target_owner, rsc_type[:-1], rsc_name)) + else: + print("(%s) Skipping user migration because of error: %s" % (email, e)) def main(): - - parser = argparse.ArgumentParser(description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html') + parser = argparse.ArgumentParser( + description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html', + parents=[arv_cmd.retry_opt], + ) parser.add_argument( '--version', action='version', version="%s %s" % (sys.argv[0], __version__), help='Print version and exit.') - parser.add_argument('--tokens', type=str, required=False) + parser.add_argument('--tokens', type=str, metavar='FILE', required=False, help="Read tokens from FILE. Not needed when using LoginCluster.") + parser.add_argument('--data-into-subproject', action="store_true", help="Migrate user's data into a separate subproject. This can be used to avoid name collisions from within an account.") group = parser.add_mutually_exclusive_group(required=True) - group.add_argument('--report', type=str, help="Generate report .csv file listing users by email address and their associated Arvados accounts") - group.add_argument('--migrate', type=str, help="Consume report .csv and migrate users to designated Arvados accounts") - group.add_argument('--dry-run', type=str, help="Consume report .csv and report how user would be migrated to designated Arvados accounts") - group.add_argument('--check', action="store_true", help="Check that tokens are usable and the federation is well connected") + group.add_argument('--report', type=str, metavar='FILE', help="Generate report .csv file listing users by email address and their associated Arvados accounts.") + group.add_argument('--migrate', type=str, metavar='FILE', help="Consume report .csv and migrate users to designated Arvados accounts.") + group.add_argument('--dry-run', type=str, metavar='FILE', help="Consume report .csv and report how user would be migrated to designated Arvados accounts.") + group.add_argument('--check', action="store_true", help="Check that tokens are usable and the federation is well connected.") args = parser.parse_args() clusters, errors, loginCluster = connect_clusters(args) @@ -338,8 +388,10 @@ def main(): if new_user_uuid is None: continue - # cluster where the migration is happening + remote_users = {} + got_error = False for migratecluster in clusters: + # cluster where the migration is happening migratearv = clusters[migratecluster] # the user's new home cluster @@ -348,14 +400,22 @@ def main(): newuser = activate_remote_user(args, email, homearv, migratearv, old_user_uuid, new_user_uuid) if newuser is None: - continue + got_error = True + remote_users[migratecluster] = newuser + + if not got_error: + for migratecluster in clusters: + migratearv = clusters[migratecluster] + newuser = remote_users[migratecluster] + if newuser is False: + continue - print("(%s) Migrating %s to %s on %s" % (email, old_user_uuid, new_user_uuid, migratecluster)) + print("(%s) Migrating %s to %s on %s" % (email, old_user_uuid, new_user_uuid, migratecluster)) - migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid) + migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid) - if newuser['username'] != username: - update_username(args, email, new_user_uuid, username, migratecluster, migratearv) + if newuser['username'] != username: + update_username(args, email, new_user_uuid, username, migratecluster, migratearv) if __name__ == "__main__": main()