+ loginCluster = None
+ if args.tokens:
+ print("Reading %s" % args.tokens)
+ with open(args.tokens, "rt") as f:
+ for r in csv.reader(f):
+ if len(r) != 2:
+ continue
+ host = r[0]
+ token = r[1]
+ print("Contacting %s" % (host))
+ arv = arvados.api(host=host, token=token, cache=False)
+ clusters[arv._rootDesc["uuidPrefix"]] = arv
+ else:
+ arv = arvados.api(cache=False)
+ rh = arv._rootDesc["remoteHosts"]
+ tok = arv.api_client_authorizations().current().execute()
+ token = "v2/%s/%s" % (tok["uuid"], tok["api_token"])
+
+ for k,v in rh.items():
+ arv = arvados.api(host=v, token=token, cache=False, insecure=os.environ.get("ARVADOS_API_HOST_INSECURE"))
+ clusters[k] = arv
+
+ for _, arv in clusters.items():
+ config = arv.configs().get().execute()
+ if config["Login"]["LoginCluster"] != "" and loginCluster is None:
+ loginCluster = config["Login"]["LoginCluster"]
+
+ print("Checking that the federation is well connected")
+ for arv in clusters.values():
+ config = arv.configs().get().execute()
+ if loginCluster and config["Login"]["LoginCluster"] != loginCluster and config["ClusterID"] != loginCluster:
+ errors.append("Inconsistent login cluster configuration, expected '%s' on %s but was '%s'" % (loginCluster, config["ClusterID"], config["Login"]["LoginCluster"]))
+ continue
+
+ if arv._rootDesc["revision"] < "20190926":
+ errors.append("Arvados API server revision on cluster '%s' is too old, must be updated to at least Arvados 1.5 before running migration." % config["ClusterID"])
+ continue
+
+ try:
+ cur = arv.users().current().execute()
+ except arvados.errors.ApiError as e:
+ errors.append("checking token for %s %s" % (arv._rootDesc["rootUrl"], e))
+ continue
+
+ if not cur["is_admin"]:
+ errors.append("User %s is not admin on %s" % (cur["uuid"], arv._rootDesc["uuidPrefix"]))
+ continue
+
+ for r in clusters:
+ if r != arv._rootDesc["uuidPrefix"] and r not in arv._rootDesc["remoteHosts"]:
+ errors.append("%s is missing from remoteHosts of %s" % (r, arv._rootDesc["uuidPrefix"]))
+ for r in arv._rootDesc["remoteHosts"]:
+ if r != "*" and r not in clusters:
+ print("WARNING: %s is federated with %s but %s is missing from the tokens file or the token is invalid" % (arv._rootDesc["uuidPrefix"], r, r))
+
+ return clusters, errors, loginCluster
+
+
+def fetch_users(clusters, loginCluster):
+ rows = []
+ by_email = {}
+ by_username = {}
+
+ users = []
+ for c, arv in clusters.items():
+ print("Getting user list from %s" % c)
+ ul = arvados.util.list_all(arv.users().list)
+ for l in ul:
+ if l["uuid"].startswith(c):
+ users.append(l)
+
+ # Users list is sorted by email
+ # Go through users and collect users with same email
+ # when we see a different email (or get to the end)
+ # call add_accum_rows() to generate the report rows with
+ # the "home cluster" set, and also fill in the by_email table.
+
+ users = sorted(users, key=lambda u: u["email"]+"::"+(u["username"] or "")+"::"+u["uuid"])
+
+ accum = []
+ lastemail = None
+
+ def add_accum_rows():
+ homeuuid = None
+ for a in accum:
+ uuids = set(a["uuid"] for a in accum)
+ homeuuid = ((len(uuids) == 1) and uuids.pop()) or ""
+ for a in accum:
+ r = (a["email"], a["username"], a["uuid"], loginCluster or homeuuid[0:5])
+ by_email.setdefault(a["email"], {})
+ by_email[a["email"]][a["uuid"]] = r
+ homeuuid_and_username = "%s::%s" % (r[HOMECLUSTER], a["username"])
+ if homeuuid_and_username not in by_username:
+ by_username[homeuuid_and_username] = a["email"]
+ elif by_username[homeuuid_and_username] != a["email"]:
+ print("ERROR: the username '%s' is listed for both '%s' and '%s' on cluster '%s'" % (r[USERNAME], r[EMAIL], by_username[homeuuid_and_username], r[HOMECLUSTER]))
+ exit(1)
+ rows.append(r)
+
+ for u in users:
+ if u["uuid"].endswith("-anonymouspublic") or u["uuid"].endswith("-000000000000000"):
+ continue
+ if lastemail == None:
+ lastemail = u["email"]
+ if u["email"] == lastemail:
+ accum.append(u)
+ else:
+ add_accum_rows()
+ lastemail = u["email"]
+ accum = [u]
+
+ add_accum_rows()
+
+ return rows, by_email, by_username
+
+
+def read_migrations(args, by_email, by_username):
+ rows = []
+ with open(args.migrate or args.dry_run, "rt") as f: