Fix 2.4.2 upgrade notes formatting refs #19330
[arvados.git] / tools / user-activity / arvados_user_activity / main.py
1 #!/usr/bin/env python3
2 # Copyright (C) The Arvados Authors. All rights reserved.
3 #
4 # SPDX-License-Identifier: AGPL-3.0
5
6 import argparse
7 import sys
8
9 import arvados
10 import arvados.util
11 import datetime
12 import ciso8601
13 import csv
14
15 def parse_arguments(arguments):
16     arg_parser = argparse.ArgumentParser()
17     arg_parser.add_argument('--start', help='Start date for the report in YYYY-MM-DD format (UTC)')
18     arg_parser.add_argument('--end', help='End date for the report in YYYY-MM-DD format (UTC)')
19     arg_parser.add_argument('--days', type=int, help='Number of days before now() to start the report')
20     arg_parser.add_argument('--csv', action='store_true', help='Output in csv format (default: false)')
21     args = arg_parser.parse_args(arguments)
22
23     if args.days and (args.start or args.end):
24         arg_parser.print_help()
25         print("Error: either specify --days or both --start and --end")
26         exit(1)
27
28     if not args.days and (not args.start or not args.end):
29         arg_parser.print_help()
30         print("\nError: either specify --days or both --start and --end")
31         exit(1)
32
33     if (args.start and not args.end) or (args.end and not args.start):
34         arg_parser.print_help()
35         print("\nError: no start or end date found, either specify --days or both --start and --end")
36         exit(1)
37
38     if args.days:
39         to = datetime.datetime.utcnow()
40         since = to - datetime.timedelta(days=args.days)
41
42     if args.start:
43         try:
44             since = datetime.datetime.strptime(args.start,"%Y-%m-%d")
45         except:
46             arg_parser.print_help()
47             print("\nError: start date must be in YYYY-MM-DD format")
48             exit(1)
49
50     if args.end:
51         try:
52             to = datetime.datetime.strptime(args.end,"%Y-%m-%d")
53         except:
54             arg_parser.print_help()
55             print("\nError: end date must be in YYYY-MM-DD format")
56             exit(1)
57
58     return args, since, to
59
60 def getowner(arv, uuid, owners):
61     if uuid is None:
62         return None
63     if uuid[6:11] == "tpzed":
64         return uuid
65
66     if uuid not in owners:
67         try:
68             gp = arv.groups().get(uuid=uuid).execute()
69             owners[uuid] = gp["owner_uuid"]
70         except:
71             owners[uuid] = None
72
73     return getowner(arv, owners[uuid], owners)
74
75 def getuserinfo(arv, uuid):
76     try:
77         u = arv.users().get(uuid=uuid).execute()
78     except:
79         return "deleted user (%susers/%s)" % (arv.config()["Services"]["Workbench1"]["ExternalURL"],
80                                                        uuid)
81     prof = "\n".join("  %s: \"%s\"" % (k, v) for k, v in u["prefs"].get("profile", {}).items() if v)
82     if prof:
83         prof = "\n"+prof+"\n"
84     return "%s %s <%s> (%susers/%s)%s" % (u["first_name"], u["last_name"], u["email"],
85                                                        arv.config()["Services"]["Workbench1"]["ExternalURL"],
86                                                        uuid, prof)
87 def getuserinfocsv(arv, uuid):
88     try:
89         u = arv.users().get(uuid=uuid).execute()
90     except:
91         return [uuid,"deleted","user",""]
92     return [uuid, u["first_name"], u["last_name"], u["email"]]
93
94
95 collectionNameCache = {}
96 def getCollectionName(arv, uuid, pdh):
97     lookupField = uuid
98     filters = [["uuid","=",uuid]]
99     cached = uuid in collectionNameCache
100     # look up by uuid if it is available, fall back to look up by pdh
101     if len(uuid) != 27:
102         # Look up by pdh. Note that this can be misleading; the download could
103         # have happened from a collection with the same pdh but different name.
104         # We arbitrarily pick the oldest collection with the pdh to lookup the
105         # name, if the uuid for the request is not known.
106         lookupField = pdh
107         filters = [["portable_data_hash","=",pdh]]
108         cached = pdh in collectionNameCache
109
110     if not cached:
111         u = arv.collections().list(filters=filters,order="created_at",limit=1).execute().get("items")
112         if len(u) < 1:
113             return "(deleted)"
114         collectionNameCache[lookupField] = u[0]["name"]
115     return collectionNameCache[lookupField]
116
117 def getname(u):
118     return "\"%s\" (%s)" % (u["name"], u["uuid"])
119
120 def main(arguments=None):
121     if arguments is None:
122         arguments = sys.argv[1:]
123
124     args, since, to = parse_arguments(arguments)
125
126     arv = arvados.api()
127
128     prefix = ''
129     suffix = "\n"
130     if args.csv:
131         prefix = '# '
132         suffix = ''
133     print("%sUser activity on %s between %s and %s%s" % (prefix, arv.config()["ClusterID"],
134                                                        since.isoformat(sep=" ", timespec="minutes"),
135                                                        to.isoformat(sep=" ", timespec="minutes"), suffix))
136
137     events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()],["created_at", "<", to.isoformat()]])
138
139     users = {}
140     owners = {}
141
142     for e in events:
143         owner = getowner(arv, e["object_owner_uuid"], owners)
144         users.setdefault(owner, [])
145         event_at = ciso8601.parse_datetime(e["event_at"]).astimezone().isoformat(sep=" ", timespec="minutes")
146         loguuid = e["uuid"]
147
148         if e["event_type"] == "create" and e["object_uuid"][6:11] == "tpzed":
149             users.setdefault(e["object_uuid"], [])
150             users[e["object_uuid"]].append([loguuid, event_at, "User account created"])
151
152         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "tpzed":
153             pass
154
155         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "xvhdp":
156             if e["properties"]["new_attributes"]["requesting_container_uuid"] is None:
157                 users[owner].append([loguuid, event_at, "Ran container %s" % (getname(e["properties"]["new_attributes"]))])
158
159         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "xvhdp":
160             pass
161
162         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "j7d0g":
163             users[owner].append([loguuid, event_at,"Created project %s" % (getname(e["properties"]["new_attributes"]))])
164
165         elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "j7d0g":
166             users[owner].append([loguuid, event_at,"Deleted project %s" % (getname(e["properties"]["old_attributes"]))])
167
168         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "j7d0g":
169             users[owner].append([loguuid, event_at,"Updated project %s" % (getname(e["properties"]["new_attributes"]))])
170
171         elif e["event_type"] in ("create", "update") and e["object_uuid"][6:11] == "gj3su":
172             since_last = None
173             if len(users[owner]) > 0 and users[owner][-1][-1].endswith("activity"):
174                 sp = users[owner][-1][-1].split(" ")
175                 start = users[owner][-1][1]
176                 since_last = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(sp[1]+" "+sp[2])
177                 span = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(start)
178
179             if since_last is not None and since_last < datetime.timedelta(minutes=61):
180                 users[owner][-1] = [loguuid, start,"to %s (%02d:%02d) Account activity" % (event_at, span.days*24 + int(span.seconds/3600), int((span.seconds % 3600)/60))]
181             else:
182                 users[owner].append([loguuid, event_at,"to %s (0:00) Account activity" % (event_at)])
183
184         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "o0j2j":
185             if e["properties"]["new_attributes"]["link_class"] == "tag":
186                 users[owner].append([event_at,"Tagged %s" % (e["properties"]["new_attributes"]["head_uuid"])])
187             elif e["properties"]["new_attributes"]["link_class"] == "permission":
188                 users[owner].append([loguuid, event_at,"Shared %s with %s" % (e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"])])
189             else:
190                 users[owner].append([loguuid, event_at,"%s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"])])
191
192         elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "o0j2j":
193             if e["properties"]["old_attributes"]["link_class"] == "tag":
194                 users[owner].append([loguuid, event_at,"Untagged %s" % (e["properties"]["old_attributes"]["head_uuid"])])
195             elif e["properties"]["old_attributes"]["link_class"] == "permission":
196                 users[owner].append([loguuid, event_at,"Unshared %s with %s" % (e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"])])
197             else:
198                 users[owner].append([loguuid, event_at,"%s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"])])
199
200         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "4zz18":
201             if e["properties"]["new_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
202                 pass
203             else:
204                 users[owner].append([loguuid, event_at,"Created collection %s" % (getname(e["properties"]["new_attributes"]))])
205
206         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "4zz18":
207             users[owner].append([loguuid, event_at,"Updated collection %s" % (getname(e["properties"]["new_attributes"]))])
208
209         elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "4zz18":
210             if e["properties"]["old_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
211                 pass
212             else:
213                 users[owner].append([loguuid, event_at, "Deleted collection %s" % (getname(e["properties"]["old_attributes"]))])
214
215         elif e["event_type"] == "file_download":
216                 users.setdefault(e["object_uuid"], [])
217                 users[e["object_uuid"]].append([loguuid, event_at, "Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (
218                                                                                        e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
219                                                                                        getCollectionName(arv, e["properties"].get("collection_uuid"), e["properties"].get("portable_data_hash")),
220                                                                                        e["properties"].get("collection_uuid"),
221                                                                                        e["properties"].get("portable_data_hash"))])
222
223
224         elif e["event_type"] == "file_upload":
225                 users.setdefault(e["object_uuid"], [])
226                 users[e["object_uuid"]].append([loguuid, event_at, "Uploaded file \"%s\" to \"%s\" (%s)" % (
227                                                                                     e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
228                                                                                     getCollectionName(arv, e["properties"].get("collection_uuid"), e["properties"].get("portable_data_hash")),
229                                                                                     e["properties"].get("collection_uuid"))])
230
231         else:
232             users[owner].append([loguuid, event_at, "%s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"])])
233
234     if args.csv:
235         csvwriter = csv.writer(sys.stdout, dialect='unix')
236
237     for k,v in users.items():
238         if k is None or k.endswith("-tpzed-000000000000000"):
239             continue
240         if not args.csv:
241           print(getuserinfo(arv, k))
242           for ev in v:
243               # Remove the log entry uuid, this report is intended for human consumption
244               ev.pop(0)
245               print("  %s" % ' '.join(ev))
246           print("")
247         else:
248           user = getuserinfocsv(arv, k)
249           for ev in v:
250             ev = user + ev
251             csvwriter.writerow(ev)
252
253 if __name__ == "__main__":
254     main()