+ add_accum_rows()
+
+ return rows, by_email, by_username
+
+
+def read_migrations(args, by_email, by_username):
+ rows = []
+ with open(args.migrate or args.dry_run, "rt") as f:
+ for r in csv.reader(f):
+ if r[EMAIL] == "email":
+ continue
+ by_email.setdefault(r[EMAIL], {})
+ by_email[r[EMAIL]][r[UUID]] = r
+
+ homeuuid_and_username = "%s::%s" % (r[HOMECLUSTER], r[USERNAME])
+ if homeuuid_and_username not in by_username:
+ by_username[homeuuid_and_username] = r[EMAIL]
+ elif by_username[homeuuid_and_username] != r[EMAIL]:
+ print("ERROR: the username '%s' is listed for both '%s' and '%s' on cluster '%s'" % (r[USERNAME], r[EMAIL], by_username[homeuuid_and_username], r[HOMECLUSTER]))
+ exit(1)
+
+ rows.append(r)
+ return rows
+
+def update_username(args, email, user_uuid, username, migratecluster, migratearv):
+ print("(%s) Updating username of %s to '%s' on %s" % (email, user_uuid, username, migratecluster))
+ if args.dry_run:
+ return
+ try:
+ conflicts = migratearv.users().list(filters=[["username", "=", username]], bypass_federation=True).execute()
+ if conflicts["items"]:
+ # There's already a user with the username, move the old user out of the way
+ migratearv.users().update(uuid=conflicts["items"][0]["uuid"],
+ bypass_federation=True,
+ body={"user": {"username": username+"migrate"}}).execute()
+ migratearv.users().update(uuid=user_uuid,
+ bypass_federation=True,
+ body={"user": {"username": username}}).execute()
+ except arvados.errors.ApiError as e:
+ print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, user_uuid, username, migratecluster, e))
+
+
+def choose_new_user(args, by_email, email, userhome, username, old_user_uuid, clusters):
+ candidates = []
+ conflict = False
+ for b in by_email[email].values():
+ if b[2].startswith(userhome):
+ candidates.append(b)
+ if b[1] != username and b[3] == userhome:
+ print("(%s) Cannot migrate %s, conflicting usernames %s and %s" % (email, old_user_uuid, b[1], username))
+ conflict = True
+ break
+ if conflict:
+ return None
+ if len(candidates) == 0:
+ if len(userhome) == 5 and userhome not in clusters:
+ print("(%s) Cannot migrate %s, unknown home cluster %s (typo?)" % (email, old_user_uuid, userhome))
+ return None
+ print("(%s) No user listed with same email to migrate %s to %s, will create new user with username '%s'" % (email, old_user_uuid, userhome, username))
+ if not args.dry_run:
+ oldhomecluster = old_user_uuid[0:5]
+ oldhomearv = clusters[oldhomecluster]
+ newhomecluster = userhome[0:5]
+ homearv = clusters[userhome]
+ user = None
+ try:
+ olduser = oldhomearv.users().get(uuid=old_user_uuid).execute()
+ conflicts = homearv.users().list(filters=[["username", "=", username]],
+ bypass_federation=True).execute()
+ if conflicts["items"]:
+ homearv.users().update(
+ uuid=conflicts["items"][0]["uuid"],
+ bypass_federation=True,
+ body={"user": {"username": username+"migrate"}}).execute()
+ user = homearv.users().create(
+ body={"user": {
+ "email": email,
+ "first_name": olduser["first_name"],
+ "last_name": olduser["last_name"],
+ "username": username,
+ "is_active": olduser["is_active"]}}).execute()
+ except arvados.errors.ApiError as e:
+ print("(%s) Could not create user: %s" % (email, str(e)))
+ return None
+
+ tup = (email, username, user["uuid"], userhome)
+ else:
+ # dry run
+ tup = (email, username, "%s-tpzed-xfakexfakexfake" % (userhome[0:5]), userhome)
+ by_email[email][tup[2]] = tup
+ candidates.append(tup)
+ if len(candidates) > 1:
+ print("(%s) Multiple users listed to migrate %s to %s, use full uuid" % (email, old_user_uuid, userhome))
+ return None
+ return candidates[0][2]
+
+
+def activate_remote_user(args, email, homearv, migratearv, old_user_uuid, new_user_uuid):
+ # create a token for the new user and salt it for the
+ # migration cluster, then use it to access the migration
+ # cluster as the new user once before merging to ensure
+ # the new user is known on that cluster.
+ migratecluster = migratearv._rootDesc["uuidPrefix"]
+ try:
+ if not args.dry_run:
+ newtok = homearv.api_client_authorizations().create(body={
+ "api_client_authorization": {'owner_uuid': new_user_uuid}}).execute()
+ else:
+ newtok = {"uuid": "dry-run", "api_token": "12345"}
+ except arvados.errors.ApiError as e:
+ print("(%s) Could not create API token for %s: %s" % (email, new_user_uuid, e))
+ return None
+
+ try:
+ findolduser = migratearv.users().list(filters=[["uuid", "=", old_user_uuid]], bypass_federation=True).execute()
+ if len(findolduser["items"]) == 0:
+ return False
+ if len(findolduser["items"]) == 1:
+ olduser = findolduser["items"][0]
+ else:
+ print("(%s) Unexpected result" % (email))
+ return None
+ except arvados.errors.ApiError as e:
+ print("(%s) Could not retrieve user %s from %s, user may have already been migrated: %s" % (email, old_user_uuid, migratecluster, e))
+ return None
+
+ salted = 'v2/' + newtok["uuid"] + '/' + hmac.new(newtok["api_token"].encode(),
+ msg=migratecluster.encode(),
+ digestmod=hashlib.sha1).hexdigest()
+ try:
+ ru = urllib.parse.urlparse(migratearv._rootDesc["rootUrl"])
+ if not args.dry_run:
+ newuser = arvados.api(host=ru.netloc, token=salted,
+ insecure=os.environ.get("ARVADOS_API_HOST_INSECURE")).users().current().execute()
+ else:
+ newuser = {"is_active": True, "username": email.split('@')[0], "is_admin": False}
+ except arvados.errors.ApiError as e:
+ print("(%s) Error getting user info for %s from %s: %s" % (email, new_user_uuid, migratecluster, e))
+ return None
+
+ if not newuser["is_active"] and olduser["is_active"]:
+ print("(%s) Activating user %s on %s" % (email, new_user_uuid, migratecluster))
+ try:
+ if not args.dry_run:
+ migratearv.users().update(uuid=new_user_uuid, bypass_federation=True,
+ body={"is_active": True}).execute()
+ except arvados.errors.ApiError as e:
+ print("(%s) Could not activate user %s on %s: %s" % (email, new_user_uuid, migratecluster, e))
+ return None
+
+ if olduser["is_admin"] and not newuser["is_admin"]:
+ print("(%s) Not migrating %s because user is admin but target user %s is not admin on %s. Please ensure the user admin status is the same on both clusters. Note that a federated admin account has admin privileges on the entire federation." % (email, old_user_uuid, new_user_uuid, migratecluster))
+ return None
+
+ return newuser
+
+def migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid):
+ if args.dry_run:
+ return
+ try:
+ new_owner_uuid = new_user_uuid
+ if args.data_into_subproject:
+ grp = migratearv.groups().create(body={
+ "owner_uuid": new_user_uuid,
+ "name": "Migrated from %s (%s)" % (email, old_user_uuid),
+ "group_class": "project"
+ }, ensure_unique_name=True).execute()
+ new_owner_uuid = grp["uuid"]
+ migratearv.users().merge(old_user_uuid=old_user_uuid,
+ new_user_uuid=new_user_uuid,
+ new_owner_uuid=new_owner_uuid,
+ redirect_to_new_user=True).execute()
+ except arvados.errors.ApiError as e:
+ name_collision = re.search(r'Key \(owner_uuid, name\)=\((.*?), (.*?)\) already exists\.\n.*UPDATE "(.*?)"', e._get_reason())
+ if name_collision:
+ target_owner, rsc_name, rsc_type = name_collision.groups()
+ print("(%s) Cannot migrate to %s because both origin and target users have a %s named '%s'. Please rename the conflicting items or use --data-into-subproject to migrate all users' data into a special subproject." % (email, target_owner, rsc_type[:-1], rsc_name))
+ else:
+ print("(%s) Skipping user migration because of error: %s" % (email, e))
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html')
+ parser.add_argument(
+ '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
+ help='Print version and exit.')
+ parser.add_argument('--tokens', type=str, metavar='FILE', required=False, help="Read tokens from FILE. Not needed when using LoginCluster.")
+ parser.add_argument('--data-into-subproject', action="store_true", help="Migrate user's data into a separate subproject. This can be used to avoid name collisions from within an account.")
+ group = parser.add_mutually_exclusive_group(required=True)
+ group.add_argument('--report', type=str, metavar='FILE', help="Generate report .csv file listing users by email address and their associated Arvados accounts.")
+ group.add_argument('--migrate', type=str, metavar='FILE', help="Consume report .csv and migrate users to designated Arvados accounts.")
+ group.add_argument('--dry-run', type=str, metavar='FILE', help="Consume report .csv and report how user would be migrated to designated Arvados accounts.")
+ group.add_argument('--check', action="store_true", help="Check that tokens are usable and the federation is well connected.")
+ args = parser.parse_args()
+
+ clusters, errors, loginCluster = connect_clusters(args)
+
+ if errors:
+ for e in errors:
+ print("ERROR: "+str(e))
+ exit(1)
+
+ if args.check:
+ print("Tokens file passed checks")
+ exit(0)
+
+ rows, by_email, by_username = fetch_users(clusters, loginCluster)