From: Ward Vandewege Date: Sat, 26 Mar 2022 00:32:27 +0000 (-0400) Subject: 18903: Merge branch 'main' into 18903-fix-activity-script X-Git-Tag: 2.5.0~232^2~2 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/c3c1f4261f61bc52dbc8fadad644520797b3f6a6?hp=ff635ccb09b0b79663b0220e16b8a0ef00997f5d 18903: Merge branch 'main' into 18903-fix-activity-script Arvados-DCO-1.1-Signed-off-by: Ward Vandewege --- diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go index 97ec95e3aa..ef61b06873 100644 --- a/services/keep-web/handler.go +++ b/services/keep-web/handler.go @@ -913,6 +913,14 @@ func (h *handler) logUploadOrDownload( WithField("collection_file_path", filepath) props["collection_uuid"] = collection.UUID props["collection_file_path"] = filepath + // h.determineCollection populates the collection_uuid prop with the PDH, if + // this collection is being accessed via PDH. In that case, blank the + // collection_uuid field so that consumers of the log entries can rely on it + // being a UUID, or blank. The PDH remains available via the + // portable_data_hash property. + if props["collection_uuid"] == collection.PortableDataHash { + props["collection_uuid"] = "" + } } if r.Method == "PUT" || r.Method == "POST" { log.Info("File upload") diff --git a/tools/user-activity/arvados_user_activity/main.py b/tools/user-activity/arvados_user_activity/main.py index 997da57e05..f078b81544 100755 --- a/tools/user-activity/arvados_user_activity/main.py +++ b/tools/user-activity/arvados_user_activity/main.py @@ -13,8 +13,26 @@ import ciso8601 def parse_arguments(arguments): arg_parser = argparse.ArgumentParser() - arg_parser.add_argument('--days', type=int, required=True) + arg_parser.add_argument('--start', help='Start date for the report in YYYY-MM-DD format') + arg_parser.add_argument('--end', help='End date for the report in YYYY-MM-DD format') + arg_parser.add_argument('--days', type=int, help='Number of days before now() to start the report') args = arg_parser.parse_args(arguments) + + if args.days and (args.start or args.end): + p.print_help() + print("Error: either specify --days or both --start and --end") + exit(1) + + if not args.days and (not args.start or not args.end): + p.print_help() + print("Error: either specify --days or both --start and --end") + exit(1) + + if (args.start and not args.end) or (args.end and not args.start): + p.print_help() + print("Error: no start or end date found, either specify --days or both --start and --end") + exit(1) + return args def getowner(arv, uuid, owners): @@ -33,7 +51,11 @@ def getowner(arv, uuid, owners): return getowner(arv, owners[uuid], owners) def getuserinfo(arv, uuid): - u = arv.users().get(uuid=uuid).execute() + try: + u = arv.users().get(uuid=uuid).execute() + except: + return "deleted user (%susers/%s)" % (arv.config()["Services"]["Workbench1"]["ExternalURL"], + uuid) prof = "\n".join(" %s: \"%s\"" % (k, v) for k, v in u["prefs"].get("profile", {}).items() if v) if prof: prof = "\n"+prof+"\n" @@ -42,11 +64,13 @@ def getuserinfo(arv, uuid): uuid, prof) collectionNameCache = {} -def getCollectionName(arv, uuid): - if uuid not in collectionNameCache: - u = arv.collections().get(uuid=uuid).execute() - collectionNameCache[uuid] = u["name"] - return collectionNameCache[uuid] +def getCollectionName(arv, pdh): + if pdh not in collectionNameCache: + u = arv.collections().list(filters=[["portable_data_hash","=",pdh]]).execute().get("items") + if len(u) < 1: + return "(deleted)" + collectionNameCache[pdh] = u[0]["name"] + return collectionNameCache[pdh] def getname(u): return "\"%s\" (%s)" % (u["name"], u["uuid"]) @@ -59,13 +83,31 @@ def main(arguments=None): arv = arvados.api() - since = datetime.datetime.utcnow() - datetime.timedelta(days=args.days) + if args.days: + to = datetime.datetime.utcnow() + since = to - datetime.timedelta(days=args.days) + + if args.start: + try: + since = datetime.datetime.strptime(args.start,"%Y-%m-%d") + except: + p.print_help() + print("Error: start date must be in YYYY-MM-DD format") + exit(1) + + if args.end: + try: + to = datetime.datetime.strptime(args.end,"%Y-%m-%d") + except: + p.print_help() + print("Error: end date must be in YYYY-MM-DD format") + exit(1) print("User activity on %s between %s and %s\n" % (arv.config()["ClusterID"], - (datetime.datetime.now() - datetime.timedelta(days=args.days)).isoformat(sep=" ", timespec="minutes"), - datetime.datetime.now().isoformat(sep=" ", timespec="minutes"))) + since.isoformat(sep=" ", timespec="minutes"), + to.isoformat(sep=" ", timespec="minutes"))) - events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()]]) + events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()],["created_at", "<", to.isoformat()]]) users = {} owners = {} @@ -119,7 +161,7 @@ def main(arguments=None): elif e["properties"]["new_attributes"]["link_class"] == "permission": users[owner].append("%s Shared %s with %s" % (event_at, e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"])) else: - users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid)) + users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid)) elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "o0j2j": if e["properties"]["old_attributes"]["link_class"] == "tag": @@ -127,7 +169,7 @@ def main(arguments=None): elif e["properties"]["old_attributes"]["link_class"] == "permission": users[owner].append("%s Unshared %s with %s" % (event_at, e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"])) else: - users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid)) + users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid)) elif e["event_type"] == "create" and e["object_uuid"][6:11] == "4zz18": if e["properties"]["new_attributes"]["properties"].get("type") in ("log", "output", "intermediate"): @@ -145,20 +187,22 @@ def main(arguments=None): users[owner].append("%s Deleted collection %s %s" % (event_at, getname(e["properties"]["old_attributes"]), loguuid)) elif e["event_type"] == "file_download": + users.setdefault(e["object_uuid"], []) users[e["object_uuid"]].append("%s Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (event_at, e["properties"].get("collection_file_path") or e["properties"].get("reqPath"), - getCollectionName(arv, e["properties"].get("collection_uuid")), + getCollectionName(arv, e["properties"].get("portable_data_hash")), e["properties"].get("collection_uuid"), e["properties"].get("portable_data_hash"))) elif e["event_type"] == "file_upload": + users.setdefault(e["object_uuid"], []) users[e["object_uuid"]].append("%s Uploaded file \"%s\" to \"%s\" (%s)" % (event_at, e["properties"].get("collection_file_path") or e["properties"].get("reqPath"), - getCollectionName(arv, e["properties"].get("collection_uuid")), + getCollectionName(arv, e["properties"].get("portable_data_hash")), e["properties"].get("collection_uuid"))) else: - users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid)) + users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid)) for k,v in users.items(): if k is None or k.endswith("-tpzed-000000000000000"):