import urllib.parse
import os
import hashlib
+import re
from arvados._version import __version__
+from . import _util as arv_cmd
EMAIL=0
USERNAME=1
host = r[0]
token = r[1]
print("Contacting %s" % (host))
- arv = arvados.api(host=host, token=token, cache=False)
+ arv = arvados.api(host=host, token=token, cache=False, num_retries=args.retries)
clusters[arv._rootDesc["uuidPrefix"]] = arv
else:
- arv = arvados.api(cache=False)
+ arv = arvados.api(cache=False, num_retries=args.retries)
rh = arv._rootDesc["remoteHosts"]
tok = arv.api_client_authorizations().current().execute()
token = "v2/%s/%s" % (tok["uuid"], tok["api_token"])
errors.append("Inconsistent login cluster configuration, expected '%s' on %s but was '%s'" % (loginCluster, config["ClusterID"], config["Login"]["LoginCluster"]))
continue
- if arv._rootDesc["revision"] < "20190926":
- errors.append("Arvados API server revision on cluster '%s' is too old, must be updated to at least Arvados 1.5 before running migration." % config["ClusterID"])
+ if arv._rootDesc["revision"] < "20200331":
+ errors.append("Arvados API server revision on cluster '%s' is too old, must be updated to at least Arvados 2.0.2 before running migration." % config["ClusterID"])
continue
try:
by_email = {}
by_username = {}
- users = []
- for c, arv in clusters.items():
- print("Getting user list from %s" % c)
- ul = arvados.util.list_all(arv.users().list)
- for l in ul:
- if l["uuid"].startswith(c):
- users.append(l)
+ users = [
+ user
+ for prefix, arv in clusters.items()
+ for user in arvados.util.keyset_list_all(arv.users().list, bypass_federation=True)
+ if user['uuid'].startswith(prefix)
+ ]
# Users list is sorted by email
# Go through users and collect users with same email
# call add_accum_rows() to generate the report rows with
# the "home cluster" set, and also fill in the by_email table.
- users = sorted(users, key=lambda u: u["email"]+"::"+(u["username"] or "")+"::"+u["uuid"])
+ users.sort(key=lambda u: (u["email"], u["username"] or "", u["uuid"]))
accum = []
lastemail = None
def update_username(args, email, user_uuid, username, migratecluster, migratearv):
print("(%s) Updating username of %s to '%s' on %s" % (email, user_uuid, username, migratecluster))
- if not args.dry_run:
- try:
- conflicts = migratearv.users().list(filters=[["username", "=", username]]).execute()
- if conflicts["items"]:
- migratearv.users().update(uuid=conflicts["items"][0]["uuid"], body={"user": {"username": username+"migrate"}}).execute()
- migratearv.users().update(uuid=user_uuid, body={"user": {"username": username}}).execute()
- except arvados.errors.ApiError as e:
- print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, user_uuid, username, migratecluster, e))
+ if args.dry_run:
+ return
+ try:
+ conflicts = migratearv.users().list(filters=[["username", "=", username]], bypass_federation=True).execute()
+ if conflicts["items"]:
+ # There's already a user with the username, move the old user out of the way
+ migratearv.users().update(uuid=conflicts["items"][0]["uuid"],
+ bypass_federation=True,
+ body={"user": {"username": username+"migrate"}}).execute()
+ migratearv.users().update(uuid=user_uuid,
+ bypass_federation=True,
+ body={"user": {"username": username}}).execute()
+ except arvados.errors.ApiError as e:
+ print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, user_uuid, username, migratecluster, e))
def choose_new_user(args, by_email, email, userhome, username, old_user_uuid, clusters):
return None
print("(%s) No user listed with same email to migrate %s to %s, will create new user with username '%s'" % (email, old_user_uuid, userhome, username))
if not args.dry_run:
+ oldhomecluster = old_user_uuid[0:5]
+ oldhomearv = clusters[oldhomecluster]
newhomecluster = userhome[0:5]
homearv = clusters[userhome]
user = None
try:
- conflicts = homearv.users().list(filters=[["username", "=", username]]).execute()
+ olduser = oldhomearv.users().get(uuid=old_user_uuid).execute()
+ conflicts = homearv.users().list(filters=[["username", "=", username]],
+ bypass_federation=True).execute()
if conflicts["items"]:
- homearv.users().update(uuid=conflicts["items"][0]["uuid"], body={"user": {"username": username+"migrate"}}).execute()
- user = homearv.users().create(body={"user": {"email": email, "username": username}}).execute()
+ homearv.users().update(
+ uuid=conflicts["items"][0]["uuid"],
+ bypass_federation=True,
+ body={"user": {"username": username+"migrate"}}).execute()
+ user = homearv.users().create(
+ body={"user": {
+ "email": email,
+ "first_name": olduser["first_name"],
+ "last_name": olduser["last_name"],
+ "username": username,
+ "is_active": olduser["is_active"]}}).execute()
except arvados.errors.ApiError as e:
print("(%s) Could not create user: %s" % (email, str(e)))
return None
return None
try:
- olduser = migratearv.users().get(uuid=old_user_uuid).execute()
+ findolduser = migratearv.users().list(filters=[["uuid", "=", old_user_uuid]], bypass_federation=True).execute()
+ if len(findolduser["items"]) == 0:
+ return False
+ if len(findolduser["items"]) == 1:
+ olduser = findolduser["items"][0]
+ else:
+ print("(%s) Unexpected result" % (email))
+ return None
except arvados.errors.ApiError as e:
- if e.resp.status != 404:
- print("(%s) Could not retrieve user %s from %s, user may have already been migrated: %s" % (email, old_user_uuid, migratecluster, e))
+ print("(%s) Could not retrieve user %s from %s, user may have already been migrated: %s" % (email, old_user_uuid, migratecluster, e))
return None
salted = 'v2/' + newtok["uuid"] + '/' + hmac.new(newtok["api_token"].encode(),
try:
ru = urllib.parse.urlparse(migratearv._rootDesc["rootUrl"])
if not args.dry_run:
- newuser = arvados.api(host=ru.netloc, token=salted, insecure=os.environ.get("ARVADOS_API_HOST_INSECURE")).users().current().execute()
+ newuser = arvados.api(host=ru.netloc, token=salted,
+ insecure=os.environ.get("ARVADOS_API_HOST_INSECURE")).users().current().execute()
else:
- newuser = {"is_active": True, "username": username}
+ newuser = {"is_active": True, "username": email.split('@')[0], "is_admin": False}
except arvados.errors.ApiError as e:
print("(%s) Error getting user info for %s from %s: %s" % (email, new_user_uuid, migratecluster, e))
return None
print("(%s) Activating user %s on %s" % (email, new_user_uuid, migratecluster))
try:
if not args.dry_run:
- migratearv.users().update(uuid=new_user_uuid, body={"is_active": True}).execute()
+ migratearv.users().update(uuid=new_user_uuid, bypass_federation=True,
+ body={"is_active": True}).execute()
except arvados.errors.ApiError as e:
print("(%s) Could not activate user %s on %s: %s" % (email, new_user_uuid, migratecluster, e))
return None
if olduser["is_admin"] and not newuser["is_admin"]:
- print("(%s) Not migrating %s because user is admin but target user %s is not admin on %s" % (email, old_user_uuid, new_user_uuid, migratecluster))
+ print("(%s) Not migrating %s because user is admin but target user %s is not admin on %s. Please ensure the user admin status is the same on both clusters. Note that a federated admin account has admin privileges on the entire federation." % (email, old_user_uuid, new_user_uuid, migratecluster))
return None
return newuser
def migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid):
+ if args.dry_run:
+ return
try:
- if not args.dry_run:
+ new_owner_uuid = new_user_uuid
+ if args.data_into_subproject:
grp = migratearv.groups().create(body={
"owner_uuid": new_user_uuid,
"name": "Migrated from %s (%s)" % (email, old_user_uuid),
"group_class": "project"
}, ensure_unique_name=True).execute()
- migratearv.users().merge(old_user_uuid=old_user_uuid,
- new_user_uuid=new_user_uuid,
- new_owner_uuid=grp["uuid"],
- redirect_to_new_user=True).execute()
+ new_owner_uuid = grp["uuid"]
+ migratearv.users().merge(old_user_uuid=old_user_uuid,
+ new_user_uuid=new_user_uuid,
+ new_owner_uuid=new_owner_uuid,
+ redirect_to_new_user=True).execute()
except arvados.errors.ApiError as e:
- print("(%s) Error migrating user: %s" % (email, e))
+ name_collision = re.search(r'Key \(owner_uuid, name\)=\((.*?), (.*?)\) already exists\.\n.*UPDATE "(.*?)"', e._get_reason())
+ if name_collision:
+ target_owner, rsc_name, rsc_type = name_collision.groups()
+ print("(%s) Cannot migrate to %s because both origin and target users have a %s named '%s'. Please rename the conflicting items or use --data-into-subproject to migrate all users' data into a special subproject." % (email, target_owner, rsc_type[:-1], rsc_name))
+ else:
+ print("(%s) Skipping user migration because of error: %s" % (email, e))
def main():
-
- parser = argparse.ArgumentParser(description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html')
+ parser = argparse.ArgumentParser(
+ description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html',
+ parents=[arv_cmd.retry_opt],
+ )
parser.add_argument(
'--version', action='version', version="%s %s" % (sys.argv[0], __version__),
help='Print version and exit.')
- parser.add_argument('--tokens', type=str, required=False)
+ parser.add_argument('--tokens', type=str, metavar='FILE', required=False, help="Read tokens from FILE. Not needed when using LoginCluster.")
+ parser.add_argument('--data-into-subproject', action="store_true", help="Migrate user's data into a separate subproject. This can be used to avoid name collisions from within an account.")
group = parser.add_mutually_exclusive_group(required=True)
- group.add_argument('--report', type=str, help="Generate report .csv file listing users by email address and their associated Arvados accounts")
- group.add_argument('--migrate', type=str, help="Consume report .csv and migrate users to designated Arvados accounts")
- group.add_argument('--dry-run', type=str, help="Consume report .csv and report how user would be migrated to designated Arvados accounts")
- group.add_argument('--check', action="store_true", help="Check that tokens are usable and the federation is well connected")
+ group.add_argument('--report', type=str, metavar='FILE', help="Generate report .csv file listing users by email address and their associated Arvados accounts.")
+ group.add_argument('--migrate', type=str, metavar='FILE', help="Consume report .csv and migrate users to designated Arvados accounts.")
+ group.add_argument('--dry-run', type=str, metavar='FILE', help="Consume report .csv and report how user would be migrated to designated Arvados accounts.")
+ group.add_argument('--check', action="store_true", help="Check that tokens are usable and the federation is well connected.")
args = parser.parse_args()
clusters, errors, loginCluster = connect_clusters(args)
if new_user_uuid is None:
continue
- # cluster where the migration is happening
+ remote_users = {}
+ got_error = False
for migratecluster in clusters:
+ # cluster where the migration is happening
migratearv = clusters[migratecluster]
# the user's new home cluster
newuser = activate_remote_user(args, email, homearv, migratearv, old_user_uuid, new_user_uuid)
if newuser is None:
- continue
+ got_error = True
+ remote_users[migratecluster] = newuser
+
+ if not got_error:
+ for migratecluster in clusters:
+ migratearv = clusters[migratecluster]
+ newuser = remote_users[migratecluster]
+ if newuser is False:
+ continue
- print("(%s) Migrating %s to %s on %s" % (email, old_user_uuid, new_user_uuid, migratecluster))
+ print("(%s) Migrating %s to %s on %s" % (email, old_user_uuid, new_user_uuid, migratecluster))
- migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid)
+ migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid)
- if newuser['username'] != username:
- update_username(args, email, new_user_uuid, username, migratecluster, migratearv)
+ if newuser['username'] != username:
+ update_username(args, email, new_user_uuid, username, migratecluster, migratearv)
if __name__ == "__main__":
main()