17022: Produce user activity report from the audit logs
authorPeter Amstutz <peter.amstutz@curii.com>
Fri, 13 Nov 2020 23:17:54 +0000 (18:17 -0500)
committerPeter Amstutz <peter.amstutz@curii.com>
Wed, 18 Nov 2020 18:52:21 +0000 (13:52 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

sdk/python/arvados/util.py
tools/user-activity/arv-user-activity.py [changed mode: 0644->0755]

index 6c9822e9f0325ec82cf68dc413843a9499755942..2380e48b734005505f125a05f453e6b88c76265c 100644 (file)
@@ -388,6 +388,67 @@ def list_all(fn, num_retries=0, **kwargs):
         offset = c['offset'] + len(c['items'])
     return items
 
+def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, **kwargs):
+    pagesize = 1000
+    kwargs["limit"] = pagesize
+    kwargs["count"] = 'none'
+    kwargs["order"] = ["%s %s" % (order_key, "asc" if ascending else "desc"), "uuid asc"]
+    other_filters = kwargs.get("filters", [])
+
+    if "select" in kwargs and "uuid" not in kwargs["select"]:
+        kwargs["select"].append("uuid")
+
+    nextpage = []
+    tot = 0
+    expect_full_page = True
+    seen_prevpage = set()
+    seen_thispage = set()
+    lastitem = None
+    prev_page_all_same_order_key = False
+
+    while True:
+        kwargs["filters"] = nextpage+other_filters
+        items = fn(**kwargs).execute(num_retries=num_retries)
+
+        if len(items["items"]) == 0:
+            if prev_page_all_same_order_key:
+                nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]]
+                prev_page_all_same_order_key = False
+                continue
+            else:
+                return
+
+        seen_prevpage = seen_thispage
+        seen_thispage = set()
+
+        for i in items["items"]:
+            # In cases where there's more than one record with the
+            # same order key, the result could include records we
+            # already saw in the last page.  Skip them.
+            if i["uuid"] in seen_prevpage:
+                continue
+            seen_thispage.add(i["uuid"])
+            yield i
+
+        firstitem = items["items"][0]
+        lastitem = items["items"][-1]
+
+        if firstitem[order_key] == lastitem[order_key]:
+            # Got a page where every item has the same order key.
+            # Switch to using uuid for paging.
+            nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">", lastitem["uuid"]]]
+            prev_page_all_same_order_key = True
+        else:
+            # Start from the last order key seen, but skip the last
+            # known uuid to avoid retrieving the same row twice.  If
+            # there are multiple rows with the same order key it is
+            # still likely we'll end up retrieving duplicate rows.
+            # That's handled by tracking the "seen" rows for each page
+            # so they can be skipped if they show up on the next page.
+            nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]], ["uuid", "!=", lastitem["uuid"]]]
+            prev_page_all_same_order_key = False
+
+
 def ca_certs_path(fallback=httplib2.CA_CERTS):
     """Return the path of the best available CA certs source.
 
old mode 100644 (file)
new mode 100755 (executable)
index e5e0f53..c8b5365
@@ -9,99 +9,115 @@ import sys
 import arvados
 import arvados.util
 
-def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, **kwargs):
-    pagesize = 1000
-    kwargs["limit"] = pagesize
-    kwargs["count"] = 'none'
-    kwargs["order"] = ["%s %s" % (order_key, "asc" if ascending else "desc"), "uuid asc"]
-    other_filters = kwargs.get("filters", [])
-
-    if "select" in kwargs and "uuid" not in kwargs["select"]:
-        kwargs["select"].append("uuid")
-
-    nextpage = []
-    tot = 0
-    expect_full_page = True
-    seen_prevpage = set()
-    seen_thispage = set()
-    lastitem = None
-    prev_page_all_same_order_key = False
-
-    while True:
-        kwargs["filters"] = nextpage+other_filters
-        items = fn(**kwargs).execute(num_retries=num_retries)
-
-        if len(items["items"]) == 0:
-            if prev_page_all_same_order_key:
-                nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]]
-                prev_page_all_same_order_key = False
-                continue
-            else:
-                return
-
-        seen_prevpage = seen_thispage
-        seen_thispage = set()
-
-        for i in items["items"]:
-            # In cases where there's more than one record with the
-            # same order key, the result could include records we
-            # already saw in the last page.  Skip them.
-            if i["uuid"] in seen_prevpage:
-                continue
-            seen_thispage.add(i["uuid"])
-            yield i
-
-        firstitem = items["items"][0]
-        lastitem = items["items"][-1]
-
-        if firstitem[order_key] == lastitem[order_key]:
-            # Got a page where every item has the same order key.
-            # Switch to using uuid for paging.
-            nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">", lastitem["uuid"]]]
-            prev_page_all_same_order_key = True
-        else:
-            # Start from the last order key seen, but skip the last
-            # known uuid to avoid retrieving the same row twice.  If
-            # there are multiple rows with the same order key it is
-            # still likely we'll end up retrieving duplicate rows.
-            # That's handled by tracking the "seen" rows for each page
-            # so they can be skipped if they show up on the next page.
-            nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]], ["uuid", "!=", lastitem["uuid"]]]
-            prev_page_all_same_order_key = False
-
-
 def parse_arguments(arguments):
     arg_parser = argparse.ArgumentParser()
     arg_parser.add_argument('--timespan', type=str)
     args = arg_parser.parse_args(arguments)
     return args
 
+def getowner(arv, uuid, owners):
+    if uuid is None:
+        return None
+    if uuid[6:11] == "tpzed":
+        return uuid
+
+    if uuid not in owners:
+        try:
+            gp = arv.groups().get(uuid=uuid).execute()
+            owners[uuid] = gp["owner_uuid"]
+        except:
+            owners[uuid] = None
+
+    return getowner(arv, owners[uuid], owners)
+
+def getusername(arv, uuid):
+    u = arv.users().get(uuid=uuid).execute()
+    return "%s %s (%s)" % (u["first_name"], u["last_name"], uuid)
+
+def getname(u):
+    return "\"%s\" (%s)" % (u["name"], u["uuid"])
+
 def main(arguments):
     args = parse_arguments(arguments)
 
     arv = arvados.api()
 
-    events = keyset_list_all(arv.logs().list, filters=[["created_at", ">=", "2020-11-05T14:51:42-05:00"]])
+    events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", "2020-10-01T14:51:42-05:00"]])
 
     users = {}
+    owners = {}
 
     for e in events:
+        owner = getowner(arv, e["object_owner_uuid"], owners)
+        users.setdefault(owner, [])
+
         if e["event_type"] == "create" and e["object_uuid"][6:11] == "tpzed":
             users.setdefault(e["object_uuid"], [])
-            users[e["object_uuid"]].append("User was created")
+            users[e["object_uuid"]].append("%s User account created" % e["event_at"])
+        if e["event_type"] == "update" and e["object_uuid"][6:11] == "tpzed":
+            pass
+            #users.setdefault(e["object_uuid"], [])
+            #users[e["object_uuid"]].append("%s User account created" % e["event_at"])
+        elif e["event_type"] == "create" and e["object_uuid"][6:11] == "xvhdp":
+            if e["properties"]["new_attributes"]["requesting_container_uuid"] is None:
+                users[owner].append("%s Ran container %s %s" % (e["event_at"], getname(e["properties"]["new_attributes"]), e["uuid"]))
+
+        elif e["event_type"] == "update" and e["object_uuid"][6:11] == "xvhdp":
+            pass
+
+        elif e["event_type"] == "create" and e["object_uuid"][6:11] == "j7d0g":
+            users[owner].append("%s Created project %s" %  (e["event_at"], getname(e["properties"]["new_attributes"])))
+
+        elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "j7d0g":
+            users[owner].append("%s Deleted project %s" % (e["event_at"], getname(e["properties"]["old_attributes"])))
+
+        elif e["event_type"] == "update" and e["object_uuid"][6:11] == "j7d0g":
+            users[owner].append("%s Updated project %s" % (e["event_at"], getname(e["properties"]["new_attributes"])))
+
+        elif e["event_type"] in ("create", "update") and e["object_uuid"][6:11] == "gj3su":
+            if len(users[owner]) > 0 and users[owner][-1].endswith("activity"):
+                sp = users[owner][-1].split(" ")
+                users[owner][-1] = "%s to %s Account activity" % (sp[0], e["event_at"])
+            else:
+                users[owner].append("%s Account activity" % (e["event_at"]))
+
+        elif e["event_type"] == "create" and e["object_uuid"][6:11] == "o0j2j":
+            if e["properties"]["new_attributes"]["link_class"] == "tag":
+                users[owner].append("%s Tagged %s" % (e["event_at"], e["properties"]["new_attributes"]["head_uuid"]))
+            elif e["properties"]["new_attributes"]["link_class"] == "permission":
+                users[owner].append("%s Shared %s with %s" % (e["event_at"], e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"]))
+            else:
+                users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], e["uuid"]))
+
+        elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "o0j2j":
+            if e["properties"]["old_attributes"]["link_class"] == "tag":
+                users[owner].append("%s Untagged %s" % (e["event_at"], e["properties"]["old_attributes"]["head_uuid"]))
+            elif e["properties"]["old_attributes"]["link_class"] == "permission":
+                users[owner].append("%s Unshared %s with %s" % (e["event_at"], e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"]))
+            else:
+                users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], e["uuid"]))
 
-        if e["event_type"] == "create" and e["object_uuid"][6:11] == "xvhdp":
-            users.setdefault(e["object_owner_uuid"], [])
-            users[e["object_owner_uuid"]].append("Ran a container")
+        elif e["event_type"] == "create" and e["object_uuid"][6:11] == "4zz18":
+            if e["properties"]["new_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
+                pass
+            else:
+                users[owner].append("%s Created collection %s %s" % (e["event_at"], getname(e["properties"]["new_attributes"]), e["uuid"]))
+
+        elif e["event_type"] == "update" and e["object_uuid"][6:11] == "4zz18":
+            users[owner].append("%s Updated collection %s %s" % (e["event_at"], getname(e["properties"]["new_attributes"]), e["uuid"]))
 
-        if e["event_type"] == "create" and e["object_uuid"][6:11] == "j7d0g":
-            users.setdefault(e["object_owner_uuid"], [])
-            users[e["object_owner_uuid"]].append("Created a project")
+        elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "4zz18":
+            users[owner].append("%s Deleted collection %s %s" % (e["event_at"], getname(e["properties"]["old_attributes"]), e["uuid"]))
+
+        else:
+            users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], e["uuid"]))
 
     for k,v in users.items():
-        print("%s:" % k)
+        if k is None or k.endswith("-tpzed-000000000000000"):
+            continue
+        print("%s:" % getusername(arv, k))
         for ev in v:
             print("  %s" % ev)
-
+        print("")
 
 main(sys.argv[1:])