18903: Merge branch 'main' into 18903-fix-activity-script
authorWard Vandewege <ward@curii.com>
Sat, 26 Mar 2022 00:32:27 +0000 (20:32 -0400)
committerWard Vandewege <ward@curii.com>
Sat, 26 Mar 2022 00:32:27 +0000 (20:32 -0400)
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward@curii.com>

services/keep-web/handler.go
tools/user-activity/arvados_user_activity/main.py

index 97ec95e3aac3f96111ab49014635ae742073b4e8..ef61b06873c50661bb29f622bfb1b5e9a1097495 100644 (file)
@@ -913,6 +913,14 @@ func (h *handler) logUploadOrDownload(
                        WithField("collection_file_path", filepath)
                props["collection_uuid"] = collection.UUID
                props["collection_file_path"] = filepath
+               // h.determineCollection populates the collection_uuid prop with the PDH, if
+               // this collection is being accessed via PDH. In that case, blank the
+               // collection_uuid field so that consumers of the log entries can rely on it
+               // being a UUID, or blank. The PDH remains available via the
+               // portable_data_hash property.
+               if props["collection_uuid"] == collection.PortableDataHash {
+                       props["collection_uuid"] = ""
+               }
        }
        if r.Method == "PUT" || r.Method == "POST" {
                log.Info("File upload")
index 997da57e052db81a25306507b23b3f60935b129e..f078b81544cefb0db8566497d15c69c5e935507f 100755 (executable)
@@ -13,8 +13,26 @@ import ciso8601
 
 def parse_arguments(arguments):
     arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument('--days', type=int, required=True)
+    arg_parser.add_argument('--start', help='Start date for the report in YYYY-MM-DD format')
+    arg_parser.add_argument('--end', help='End date for the report in YYYY-MM-DD format')
+    arg_parser.add_argument('--days', type=int, help='Number of days before now() to start the report')
     args = arg_parser.parse_args(arguments)
+
+    if args.days and (args.start or args.end):
+        p.print_help()
+        print("Error: either specify --days or both --start and --end")
+        exit(1)
+
+    if not args.days and (not args.start or not args.end):
+        p.print_help()
+        print("Error: either specify --days or both --start and --end")
+        exit(1)
+
+    if (args.start and not args.end) or (args.end and not args.start):
+        p.print_help()
+        print("Error: no start or end date found, either specify --days or both --start and --end")
+        exit(1)
+
     return args
 
 def getowner(arv, uuid, owners):
@@ -33,7 +51,11 @@ def getowner(arv, uuid, owners):
     return getowner(arv, owners[uuid], owners)
 
 def getuserinfo(arv, uuid):
-    u = arv.users().get(uuid=uuid).execute()
+    try:
+        u = arv.users().get(uuid=uuid).execute()
+    except:
+        return "deleted user (%susers/%s)" % (arv.config()["Services"]["Workbench1"]["ExternalURL"],
+                                                       uuid)
     prof = "\n".join("  %s: \"%s\"" % (k, v) for k, v in u["prefs"].get("profile", {}).items() if v)
     if prof:
         prof = "\n"+prof+"\n"
@@ -42,11 +64,13 @@ def getuserinfo(arv, uuid):
                                                        uuid, prof)
 
 collectionNameCache = {}
-def getCollectionName(arv, uuid):
-    if uuid not in collectionNameCache:
-        u = arv.collections().get(uuid=uuid).execute()
-        collectionNameCache[uuid] = u["name"]
-    return collectionNameCache[uuid]
+def getCollectionName(arv, pdh):
+    if pdh not in collectionNameCache:
+        u = arv.collections().list(filters=[["portable_data_hash","=",pdh]]).execute().get("items")
+        if len(u) < 1:
+            return "(deleted)"
+        collectionNameCache[pdh] = u[0]["name"]
+    return collectionNameCache[pdh]
 
 def getname(u):
     return "\"%s\" (%s)" % (u["name"], u["uuid"])
@@ -59,13 +83,31 @@ def main(arguments=None):
 
     arv = arvados.api()
 
-    since = datetime.datetime.utcnow() - datetime.timedelta(days=args.days)
+    if args.days:
+        to = datetime.datetime.utcnow()
+        since = to - datetime.timedelta(days=args.days)
+
+    if args.start:
+        try:
+            since = datetime.datetime.strptime(args.start,"%Y-%m-%d")
+        except:
+            p.print_help()
+            print("Error: start date must be in YYYY-MM-DD format")
+            exit(1)
+
+    if args.end:
+        try:
+            to = datetime.datetime.strptime(args.end,"%Y-%m-%d")
+        except:
+            p.print_help()
+            print("Error: end date must be in YYYY-MM-DD format")
+            exit(1)
 
     print("User activity on %s between %s and %s\n" % (arv.config()["ClusterID"],
-                                                       (datetime.datetime.now() - datetime.timedelta(days=args.days)).isoformat(sep=" ", timespec="minutes"),
-                                                       datetime.datetime.now().isoformat(sep=" ", timespec="minutes")))
+                                                       since.isoformat(sep=" ", timespec="minutes"),
+                                                       to.isoformat(sep=" ", timespec="minutes")))
 
-    events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()]])
+    events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()],["created_at", "<", to.isoformat()]])
 
     users = {}
     owners = {}
@@ -119,7 +161,7 @@ def main(arguments=None):
             elif e["properties"]["new_attributes"]["link_class"] == "permission":
                 users[owner].append("%s Shared %s with %s" % (event_at, e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"]))
             else:
-                users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+                users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
 
         elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "o0j2j":
             if e["properties"]["old_attributes"]["link_class"] == "tag":
@@ -127,7 +169,7 @@ def main(arguments=None):
             elif e["properties"]["old_attributes"]["link_class"] == "permission":
                 users[owner].append("%s Unshared %s with %s" % (event_at, e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"]))
             else:
-                users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+                users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
 
         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "4zz18":
             if e["properties"]["new_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
@@ -145,20 +187,22 @@ def main(arguments=None):
                 users[owner].append("%s Deleted collection %s %s" % (event_at, getname(e["properties"]["old_attributes"]), loguuid))
 
         elif e["event_type"] == "file_download":
+                users.setdefault(e["object_uuid"], [])
                 users[e["object_uuid"]].append("%s Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (event_at,
                                                                                        e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                       getCollectionName(arv, e["properties"].get("collection_uuid")),
+                                                                                       getCollectionName(arv, e["properties"].get("portable_data_hash")),
                                                                                        e["properties"].get("collection_uuid"),
                                                                                        e["properties"].get("portable_data_hash")))
 
         elif e["event_type"] == "file_upload":
+                users.setdefault(e["object_uuid"], [])
                 users[e["object_uuid"]].append("%s Uploaded file \"%s\" to \"%s\" (%s)" % (event_at,
                                                                                     e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                    getCollectionName(arv, e["properties"].get("collection_uuid")),
+                                                                                    getCollectionName(arv, e["properties"].get("portable_data_hash")),
                                                                                     e["properties"].get("collection_uuid")))
 
         else:
-            users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+            users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
 
     for k,v in users.items():
         if k is None or k.endswith("-tpzed-000000000000000"):