From 4bcecb9f823b4aaab8bd803aa4e90b3d8be9d92a Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 13 Nov 2020 18:17:54 -0500 Subject: [PATCH] 17022: Produce user activity report from the audit logs Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- sdk/python/arvados/util.py | 61 +++++++++ tools/user-activity/arv-user-activity.py | 158 +++++++++++++---------- 2 files changed, 148 insertions(+), 71 deletions(-) mode change 100644 => 100755 tools/user-activity/arv-user-activity.py diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py index 6c9822e9f0..2380e48b73 100644 --- a/sdk/python/arvados/util.py +++ b/sdk/python/arvados/util.py @@ -388,6 +388,67 @@ def list_all(fn, num_retries=0, **kwargs): offset = c['offset'] + len(c['items']) return items +def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, **kwargs): + pagesize = 1000 + kwargs["limit"] = pagesize + kwargs["count"] = 'none' + kwargs["order"] = ["%s %s" % (order_key, "asc" if ascending else "desc"), "uuid asc"] + other_filters = kwargs.get("filters", []) + + if "select" in kwargs and "uuid" not in kwargs["select"]: + kwargs["select"].append("uuid") + + nextpage = [] + tot = 0 + expect_full_page = True + seen_prevpage = set() + seen_thispage = set() + lastitem = None + prev_page_all_same_order_key = False + + while True: + kwargs["filters"] = nextpage+other_filters + items = fn(**kwargs).execute(num_retries=num_retries) + + if len(items["items"]) == 0: + if prev_page_all_same_order_key: + nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]] + prev_page_all_same_order_key = False + continue + else: + return + + seen_prevpage = seen_thispage + seen_thispage = set() + + for i in items["items"]: + # In cases where there's more than one record with the + # same order key, the result could include records we + # already saw in the last page. Skip them. + if i["uuid"] in seen_prevpage: + continue + seen_thispage.add(i["uuid"]) + yield i + + firstitem = items["items"][0] + lastitem = items["items"][-1] + + if firstitem[order_key] == lastitem[order_key]: + # Got a page where every item has the same order key. + # Switch to using uuid for paging. + nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">", lastitem["uuid"]]] + prev_page_all_same_order_key = True + else: + # Start from the last order key seen, but skip the last + # known uuid to avoid retrieving the same row twice. If + # there are multiple rows with the same order key it is + # still likely we'll end up retrieving duplicate rows. + # That's handled by tracking the "seen" rows for each page + # so they can be skipped if they show up on the next page. + nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]], ["uuid", "!=", lastitem["uuid"]]] + prev_page_all_same_order_key = False + + def ca_certs_path(fallback=httplib2.CA_CERTS): """Return the path of the best available CA certs source. diff --git a/tools/user-activity/arv-user-activity.py b/tools/user-activity/arv-user-activity.py old mode 100644 new mode 100755 index e5e0f5385d..c8b5365b77 --- a/tools/user-activity/arv-user-activity.py +++ b/tools/user-activity/arv-user-activity.py @@ -9,99 +9,115 @@ import sys import arvados import arvados.util -def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, **kwargs): - pagesize = 1000 - kwargs["limit"] = pagesize - kwargs["count"] = 'none' - kwargs["order"] = ["%s %s" % (order_key, "asc" if ascending else "desc"), "uuid asc"] - other_filters = kwargs.get("filters", []) - - if "select" in kwargs and "uuid" not in kwargs["select"]: - kwargs["select"].append("uuid") - - nextpage = [] - tot = 0 - expect_full_page = True - seen_prevpage = set() - seen_thispage = set() - lastitem = None - prev_page_all_same_order_key = False - - while True: - kwargs["filters"] = nextpage+other_filters - items = fn(**kwargs).execute(num_retries=num_retries) - - if len(items["items"]) == 0: - if prev_page_all_same_order_key: - nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]] - prev_page_all_same_order_key = False - continue - else: - return - - seen_prevpage = seen_thispage - seen_thispage = set() - - for i in items["items"]: - # In cases where there's more than one record with the - # same order key, the result could include records we - # already saw in the last page. Skip them. - if i["uuid"] in seen_prevpage: - continue - seen_thispage.add(i["uuid"]) - yield i - - firstitem = items["items"][0] - lastitem = items["items"][-1] - - if firstitem[order_key] == lastitem[order_key]: - # Got a page where every item has the same order key. - # Switch to using uuid for paging. - nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">", lastitem["uuid"]]] - prev_page_all_same_order_key = True - else: - # Start from the last order key seen, but skip the last - # known uuid to avoid retrieving the same row twice. If - # there are multiple rows with the same order key it is - # still likely we'll end up retrieving duplicate rows. - # That's handled by tracking the "seen" rows for each page - # so they can be skipped if they show up on the next page. - nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]], ["uuid", "!=", lastitem["uuid"]]] - prev_page_all_same_order_key = False - - def parse_arguments(arguments): arg_parser = argparse.ArgumentParser() arg_parser.add_argument('--timespan', type=str) args = arg_parser.parse_args(arguments) return args +def getowner(arv, uuid, owners): + if uuid is None: + return None + if uuid[6:11] == "tpzed": + return uuid + + if uuid not in owners: + try: + gp = arv.groups().get(uuid=uuid).execute() + owners[uuid] = gp["owner_uuid"] + except: + owners[uuid] = None + + return getowner(arv, owners[uuid], owners) + +def getusername(arv, uuid): + u = arv.users().get(uuid=uuid).execute() + return "%s %s (%s)" % (u["first_name"], u["last_name"], uuid) + +def getname(u): + return "\"%s\" (%s)" % (u["name"], u["uuid"]) + def main(arguments): args = parse_arguments(arguments) arv = arvados.api() - events = keyset_list_all(arv.logs().list, filters=[["created_at", ">=", "2020-11-05T14:51:42-05:00"]]) + events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", "2020-10-01T14:51:42-05:00"]]) users = {} + owners = {} for e in events: + owner = getowner(arv, e["object_owner_uuid"], owners) + users.setdefault(owner, []) + if e["event_type"] == "create" and e["object_uuid"][6:11] == "tpzed": users.setdefault(e["object_uuid"], []) - users[e["object_uuid"]].append("User was created") + users[e["object_uuid"]].append("%s User account created" % e["event_at"]) + if e["event_type"] == "update" and e["object_uuid"][6:11] == "tpzed": + pass + #users.setdefault(e["object_uuid"], []) + #users[e["object_uuid"]].append("%s User account created" % e["event_at"]) + elif e["event_type"] == "create" and e["object_uuid"][6:11] == "xvhdp": + if e["properties"]["new_attributes"]["requesting_container_uuid"] is None: + users[owner].append("%s Ran container %s %s" % (e["event_at"], getname(e["properties"]["new_attributes"]), e["uuid"])) + + elif e["event_type"] == "update" and e["object_uuid"][6:11] == "xvhdp": + pass + + elif e["event_type"] == "create" and e["object_uuid"][6:11] == "j7d0g": + users[owner].append("%s Created project %s" % (e["event_at"], getname(e["properties"]["new_attributes"]))) + + elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "j7d0g": + users[owner].append("%s Deleted project %s" % (e["event_at"], getname(e["properties"]["old_attributes"]))) + + elif e["event_type"] == "update" and e["object_uuid"][6:11] == "j7d0g": + users[owner].append("%s Updated project %s" % (e["event_at"], getname(e["properties"]["new_attributes"]))) + + elif e["event_type"] in ("create", "update") and e["object_uuid"][6:11] == "gj3su": + if len(users[owner]) > 0 and users[owner][-1].endswith("activity"): + sp = users[owner][-1].split(" ") + users[owner][-1] = "%s to %s Account activity" % (sp[0], e["event_at"]) + else: + users[owner].append("%s Account activity" % (e["event_at"])) + + elif e["event_type"] == "create" and e["object_uuid"][6:11] == "o0j2j": + if e["properties"]["new_attributes"]["link_class"] == "tag": + users[owner].append("%s Tagged %s" % (e["event_at"], e["properties"]["new_attributes"]["head_uuid"])) + elif e["properties"]["new_attributes"]["link_class"] == "permission": + users[owner].append("%s Shared %s with %s" % (e["event_at"], e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"])) + else: + users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], e["uuid"])) + + elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "o0j2j": + if e["properties"]["old_attributes"]["link_class"] == "tag": + users[owner].append("%s Untagged %s" % (e["event_at"], e["properties"]["old_attributes"]["head_uuid"])) + elif e["properties"]["old_attributes"]["link_class"] == "permission": + users[owner].append("%s Unshared %s with %s" % (e["event_at"], e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"])) + else: + users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], e["uuid"])) - if e["event_type"] == "create" and e["object_uuid"][6:11] == "xvhdp": - users.setdefault(e["object_owner_uuid"], []) - users[e["object_owner_uuid"]].append("Ran a container") + elif e["event_type"] == "create" and e["object_uuid"][6:11] == "4zz18": + if e["properties"]["new_attributes"]["properties"].get("type") in ("log", "output", "intermediate"): + pass + else: + users[owner].append("%s Created collection %s %s" % (e["event_at"], getname(e["properties"]["new_attributes"]), e["uuid"])) + + elif e["event_type"] == "update" and e["object_uuid"][6:11] == "4zz18": + users[owner].append("%s Updated collection %s %s" % (e["event_at"], getname(e["properties"]["new_attributes"]), e["uuid"])) - if e["event_type"] == "create" and e["object_uuid"][6:11] == "j7d0g": - users.setdefault(e["object_owner_uuid"], []) - users[e["object_owner_uuid"]].append("Created a project") + elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "4zz18": + users[owner].append("%s Deleted collection %s %s" % (e["event_at"], getname(e["properties"]["old_attributes"]), e["uuid"])) + + else: + users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], e["uuid"])) for k,v in users.items(): - print("%s:" % k) + if k is None or k.endswith("-tpzed-000000000000000"): + continue + print("%s:" % getusername(arv, k)) for ev in v: print(" %s" % ev) - + print("") main(sys.argv[1:]) -- 2.30.2