Merge branch '16417-installer-loki'
[arvados.git] / tools / keep-xref / keep-xref.py
1 #!/usr/bin/env python3
2 #
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6 #
7
8 import argparse
9 import arvados
10 import arvados.util
11 import csv
12 import sys
13 import logging
14
15 lglvl = logging.INFO+1
16 logging.basicConfig(level=lglvl, format='%(message)s')
17
18 """
19  Given a list of collections missing blocks (as produced by
20 keep-balance), produce a report listing affected collections and
21 container requests.
22 """
23
24 def rerun_request(arv, container_requests_to_rerun, ct):
25     requests = arvados.util.keyset_list_all(
26         arv.container_requests().list,
27         filters=[["container_uuid", "=", ct["uuid"]]],
28         order='uuid')
29     for cr in requests:
30         if cr["requesting_container_uuid"]:
31             rerun_request(arv, container_requests_to_rerun, arv.containers().get(uuid=cr["requesting_container_uuid"]).execute())
32         else:
33             container_requests_to_rerun[cr["uuid"]] = cr
34
35 def get_owner(arv, owners, record):
36     uuid = record["owner_uuid"]
37     if uuid not in owners:
38         if uuid[6:11] == "tpzed":
39             owners[uuid] = (arv.users().get(uuid=uuid).execute()["full_name"], uuid)
40         else:
41             grp = arv.groups().get(uuid=uuid).execute()
42             _, ou = get_owner(arv, owners, grp)
43             owners[uuid] = (grp["name"], ou)
44     return owners[uuid]
45
46 def main():
47     parser = argparse.ArgumentParser(description='Re-run containers associated with missing blocks')
48     parser.add_argument('inp')
49     args = parser.parse_args()
50
51     arv = arvados.api('v1')
52
53     busted_collections = set()
54
55     logging.log(lglvl, "Reading %s", args.inp)
56
57     # Get the list of bad collection PDHs
58     blocksfile = open(args.inp, "rt")
59     for line in blocksfile:
60         # Ignore the first item, that's the block id
61         collections = line.rstrip().split(" ")[1:]
62         for c in collections:
63             busted_collections.add(c)
64
65     out = csv.writer(sys.stdout)
66
67     out.writerow(("collection uuid", "container request uuid", "record name", "modified at", "owner uuid", "owner name", "root owner uuid", "root owner name", "notes"))
68
69     logging.log(lglvl, "Finding collections")
70
71     owners = {}
72     collections_to_delete = {}
73     container_requests_to_rerun = {}
74     # Get containers that produced these collections
75     i = 0
76     for b in busted_collections:
77         if (i % 100) == 0:
78             logging.log(lglvl, "%d/%d", i, len(busted_collections))
79         i += 1
80         collections_to_delete = arvados.util.keyset_list_all(arv.collections().list, filters=[["portable_data_hash", "=", b]], order='uuid')
81         for d in collections_to_delete:
82             t = ""
83             if d["properties"].get("type") not in ("output", "log"):
84                 t = "\"type\" was '%s', expected one of 'output' or 'log'" % d["properties"].get("type")
85             ou = get_owner(arv, owners, d)
86             out.writerow((d["uuid"], "", d["name"], d["modified_at"], d["owner_uuid"], ou[0], ou[1], owners[ou[1]][0], t))
87
88         maybe_containers_to_rerun = arvados.util.keyset_list_all(arv.containers().list, filters=[["output", "=", b]], order='uuid')
89         for ct in maybe_containers_to_rerun:
90             rerun_request(arv, container_requests_to_rerun, ct)
91
92     logging.log(lglvl, "%d/%d", i, len(busted_collections))
93     logging.log(lglvl, "Finding container requests")
94
95     i = 0
96     for _, cr in container_requests_to_rerun.items():
97         if (i % 100) == 0:
98             logging.log(lglvl, "%d/%d", i, len(container_requests_to_rerun))
99         i += 1
100         ou = get_owner(arv, owners, cr)
101         out.writerow(("", cr["uuid"], cr["name"], cr["modified_at"], cr["owner_uuid"], ou[0], ou[1], owners[ou[1]][0], ""))
102
103     logging.log(lglvl, "%d/%d", i, len(container_requests_to_rerun))
104
105 if __name__ == "__main__":
106     main()