Merge branch '18940-go-settings-conf'
[arvados.git] / tools / user-activity / arvados_user_activity / main.py
1 #!/usr/bin/env python3
2 # Copyright (C) The Arvados Authors. All rights reserved.
3 #
4 # SPDX-License-Identifier: AGPL-3.0
5
6 import argparse
7 import sys
8
9 import arvados
10 import arvados.util
11 import datetime
12 import ciso8601
13
14 def parse_arguments(arguments):
15     arg_parser = argparse.ArgumentParser()
16     arg_parser.add_argument('--start', help='Start date for the report in YYYY-MM-DD format (UTC)')
17     arg_parser.add_argument('--end', help='End date for the report in YYYY-MM-DD format (UTC)')
18     arg_parser.add_argument('--days', type=int, help='Number of days before now() to start the report')
19     args = arg_parser.parse_args(arguments)
20
21     if args.days and (args.start or args.end):
22         arg_parser.print_help()
23         print("Error: either specify --days or both --start and --end")
24         exit(1)
25
26     if not args.days and (not args.start or not args.end):
27         arg_parser.print_help()
28         print("\nError: either specify --days or both --start and --end")
29         exit(1)
30
31     if (args.start and not args.end) or (args.end and not args.start):
32         arg_parser.print_help()
33         print("\nError: no start or end date found, either specify --days or both --start and --end")
34         exit(1)
35
36     if args.days:
37         to = datetime.datetime.utcnow()
38         since = to - datetime.timedelta(days=args.days)
39
40     if args.start:
41         try:
42             since = datetime.datetime.strptime(args.start,"%Y-%m-%d")
43         except:
44             arg_parser.print_help()
45             print("\nError: start date must be in YYYY-MM-DD format")
46             exit(1)
47
48     if args.end:
49         try:
50             to = datetime.datetime.strptime(args.end,"%Y-%m-%d")
51         except:
52             arg_parser.print_help()
53             print("\nError: end date must be in YYYY-MM-DD format")
54             exit(1)
55
56     return args, since, to
57
58 def getowner(arv, uuid, owners):
59     if uuid is None:
60         return None
61     if uuid[6:11] == "tpzed":
62         return uuid
63
64     if uuid not in owners:
65         try:
66             gp = arv.groups().get(uuid=uuid).execute()
67             owners[uuid] = gp["owner_uuid"]
68         except:
69             owners[uuid] = None
70
71     return getowner(arv, owners[uuid], owners)
72
73 def getuserinfo(arv, uuid):
74     try:
75         u = arv.users().get(uuid=uuid).execute()
76     except:
77         return "deleted user (%susers/%s)" % (arv.config()["Services"]["Workbench1"]["ExternalURL"],
78                                                        uuid)
79     prof = "\n".join("  %s: \"%s\"" % (k, v) for k, v in u["prefs"].get("profile", {}).items() if v)
80     if prof:
81         prof = "\n"+prof+"\n"
82     return "%s %s <%s> (%susers/%s)%s" % (u["first_name"], u["last_name"], u["email"],
83                                                        arv.config()["Services"]["Workbench1"]["ExternalURL"],
84                                                        uuid, prof)
85
86 collectionNameCache = {}
87 def getCollectionName(arv, uuid, pdh):
88     lookupField = uuid
89     filters = [["uuid","=",uuid]]
90     cached = uuid in collectionNameCache
91     # look up by uuid if it is available, fall back to look up by pdh
92     if len(uuid) != 27:
93         # Look up by pdh. Note that this can be misleading; the download could
94         # have happened from a collection with the same pdh but different name.
95         # We arbitrarily pick the oldest collection with the pdh to lookup the
96         # name, if the uuid for the request is not known.
97         lookupField = pdh
98         filters = [["portable_data_hash","=",pdh]]
99         cached = pdh in collectionNameCache
100
101     if not cached:
102         u = arv.collections().list(filters=filters,order="created_at",limit=1).execute().get("items")
103         if len(u) < 1:
104             return "(deleted)"
105         collectionNameCache[lookupField] = u[0]["name"]
106     return collectionNameCache[lookupField]
107
108 def getname(u):
109     return "\"%s\" (%s)" % (u["name"], u["uuid"])
110
111 def main(arguments=None):
112     if arguments is None:
113         arguments = sys.argv[1:]
114
115     args, since, to = parse_arguments(arguments)
116
117     arv = arvados.api()
118
119     print("User activity on %s between %s and %s\n" % (arv.config()["ClusterID"],
120                                                        since.isoformat(sep=" ", timespec="minutes"),
121                                                        to.isoformat(sep=" ", timespec="minutes")))
122
123     events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()],["created_at", "<", to.isoformat()]])
124
125     users = {}
126     owners = {}
127
128     for e in events:
129         owner = getowner(arv, e["object_owner_uuid"], owners)
130         users.setdefault(owner, [])
131         event_at = ciso8601.parse_datetime(e["event_at"]).astimezone().isoformat(sep=" ", timespec="minutes")
132         # loguuid = e["uuid"]
133         loguuid = ""
134
135         if e["event_type"] == "create" and e["object_uuid"][6:11] == "tpzed":
136             users.setdefault(e["object_uuid"], [])
137             users[e["object_uuid"]].append("%s User account created" % event_at)
138
139         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "tpzed":
140             pass
141
142         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "xvhdp":
143             if e["properties"]["new_attributes"]["requesting_container_uuid"] is None:
144                 users[owner].append("%s Ran container %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
145
146         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "xvhdp":
147             pass
148
149         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "j7d0g":
150             users[owner].append("%s Created project %s" %  (event_at, getname(e["properties"]["new_attributes"])))
151
152         elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "j7d0g":
153             users[owner].append("%s Deleted project %s" % (event_at, getname(e["properties"]["old_attributes"])))
154
155         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "j7d0g":
156             users[owner].append("%s Updated project %s" % (event_at, getname(e["properties"]["new_attributes"])))
157
158         elif e["event_type"] in ("create", "update") and e["object_uuid"][6:11] == "gj3su":
159             since_last = None
160             if len(users[owner]) > 0 and users[owner][-1].endswith("activity"):
161                 sp = users[owner][-1].split(" ")
162                 start = sp[0]+" "+sp[1]
163                 since_last = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(sp[3]+" "+sp[4])
164                 span = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(start)
165
166             if since_last is not None and since_last < datetime.timedelta(minutes=61):
167                 users[owner][-1] = "%s to %s (%02d:%02d) Account activity" % (start, event_at, span.days*24 + int(span.seconds/3600), int((span.seconds % 3600)/60))
168             else:
169                 users[owner].append("%s to %s (0:00) Account activity" % (event_at, event_at))
170
171         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "o0j2j":
172             if e["properties"]["new_attributes"]["link_class"] == "tag":
173                 users[owner].append("%s Tagged %s" % (event_at, e["properties"]["new_attributes"]["head_uuid"]))
174             elif e["properties"]["new_attributes"]["link_class"] == "permission":
175                 users[owner].append("%s Shared %s with %s" % (event_at, e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"]))
176             else:
177                 users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
178
179         elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "o0j2j":
180             if e["properties"]["old_attributes"]["link_class"] == "tag":
181                 users[owner].append("%s Untagged %s" % (event_at, e["properties"]["old_attributes"]["head_uuid"]))
182             elif e["properties"]["old_attributes"]["link_class"] == "permission":
183                 users[owner].append("%s Unshared %s with %s" % (event_at, e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"]))
184             else:
185                 users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
186
187         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "4zz18":
188             if e["properties"]["new_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
189                 pass
190             else:
191                 users[owner].append("%s Created collection %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
192
193         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "4zz18":
194             users[owner].append("%s Updated collection %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
195
196         elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "4zz18":
197             if e["properties"]["old_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
198                 pass
199             else:
200                 users[owner].append("%s Deleted collection %s %s" % (event_at, getname(e["properties"]["old_attributes"]), loguuid))
201
202         elif e["event_type"] == "file_download":
203                 users.setdefault(e["object_uuid"], [])
204                 users[e["object_uuid"]].append("%s Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (event_at,
205                                                                                        e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
206                                                                                        getCollectionName(arv, e["properties"].get("collection_uuid"), e["properties"].get("portable_data_hash")),
207                                                                                        e["properties"].get("collection_uuid"),
208                                                                                        e["properties"].get("portable_data_hash")))
209
210         elif e["event_type"] == "file_upload":
211                 users.setdefault(e["object_uuid"], [])
212                 users[e["object_uuid"]].append("%s Uploaded file \"%s\" to \"%s\" (%s)" % (event_at,
213                                                                                     e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
214                                                                                     getCollectionName(arv, e["properties"].get("collection_uuid"), e["properties"].get("portable_data_hash")),
215                                                                                     e["properties"].get("collection_uuid")))
216
217         else:
218             users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
219
220     for k,v in users.items():
221         if k is None or k.endswith("-tpzed-000000000000000"):
222             continue
223         print(getuserinfo(arv, k))
224         for ev in v:
225             print("  %s" % ev)
226         print("")
227
228 if __name__ == "__main__":
229     main()