14865: Reverts change to read op and fixes StringFile
[arvados.git] / tools / keep-xref / keep-xref.py
1 #!/usr/bin/env python
2 #
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6 #
7
8 from __future__ import print_function, absolute_import
9 import argparse
10 import arvados
11 import arvados.util
12 import csv
13 import sys
14 import logging
15
16 lglvl = logging.INFO+1
17 logging.basicConfig(level=lglvl, format='%(message)s')
18
19 """
20  Given a list of collections missing blocks (as produced by
21 keep-balance), produce a report listing affected collections and
22 container requests.
23 """
24
25 def rerun_request(arv, container_requests_to_rerun, ct):
26     requests = arvados.util.list_all(arv.container_requests().list, filters=[["container_uuid", "=", ct["uuid"]]])
27     for cr in requests:
28         if cr["requesting_container_uuid"]:
29             rerun_request(arv, container_requests_to_rerun, arv.containers().get(uuid=cr["requesting_container_uuid"]).execute())
30         else:
31             container_requests_to_rerun[cr["uuid"]] = cr
32
33 def get_owner(arv, owners, record):
34     uuid = record["owner_uuid"]
35     if uuid not in owners:
36         if uuid[6:11] == "tpzed":
37             owners[uuid] = (arv.users().get(uuid=uuid).execute()["full_name"], uuid)
38         else:
39             grp = arv.groups().get(uuid=uuid).execute()
40             _, ou = get_owner(arv, owners, grp)
41             owners[uuid] = (grp["name"], ou)
42     return owners[uuid]
43
44 def main():
45     parser = argparse.ArgumentParser(description='Re-run containers associated with missing blocks')
46     parser.add_argument('inp')
47     args = parser.parse_args()
48
49     arv = arvados.api('v1')
50
51     busted_collections = set()
52
53     logging.log(lglvl, "Reading %s", args.inp)
54
55     # Get the list of bad collection PDHs
56     blocksfile = open(args.inp, "rt")
57     for line in blocksfile:
58         # Ignore the first item, that's the block id
59         collections = line.rstrip().split(" ")[1:]
60         for c in collections:
61             busted_collections.add(c)
62
63     out = csv.writer(sys.stdout)
64
65     out.writerow(("collection uuid", "container request uuid", "record name", "modified at", "owner uuid", "owner name", "root owner uuid", "root owner name", "notes"))
66
67     logging.log(lglvl, "Finding collections")
68
69     owners = {}
70     collections_to_delete = {}
71     container_requests_to_rerun = {}
72     # Get containers that produced these collections
73     i = 0
74     for b in busted_collections:
75         if (i % 100) == 0:
76             logging.log(lglvl, "%d/%d", i, len(busted_collections))
77         i += 1
78         collections_to_delete = arvados.util.list_all(arv.collections().list, filters=[["portable_data_hash", "=", b]])
79         for d in collections_to_delete:
80             t = ""
81             if d["properties"].get("type") not in ("output", "log"):
82                 t = "\"type\" was '%s', expected one of 'output' or 'log'" % d["properties"].get("type")
83             ou = get_owner(arv, owners, d)
84             out.writerow((d["uuid"], "", d["name"], d["modified_at"], d["owner_uuid"], ou[0], ou[1], owners[ou[1]][0], t))
85
86         maybe_containers_to_rerun = arvados.util.list_all(arv.containers().list, filters=[["output", "=", b]])
87         for ct in maybe_containers_to_rerun:
88             rerun_request(arv, container_requests_to_rerun, ct)
89
90     logging.log(lglvl, "%d/%d", i, len(busted_collections))
91     logging.log(lglvl, "Finding container requests")
92
93     i = 0
94     for _, cr in container_requests_to_rerun.items():
95         if (i % 100) == 0:
96             logging.log(lglvl, "%d/%d", i, len(container_requests_to_rerun))
97         i += 1
98         ou = get_owner(arv, owners, cr)
99         out.writerow(("", cr["uuid"], cr["name"], cr["modified_at"], cr["owner_uuid"], ou[0], ou[1], owners[ou[1]][0], ""))
100
101     logging.log(lglvl, "%d/%d", i, len(container_requests_to_rerun))
102
103 if __name__ == "__main__":
104     main()