a497dc8c33ed1ce1ec5123e424eaf6a606265308
[arvados.git] / tools / rerun / keep-rerun.py
1 #!/usr/bin/env python
2 #
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: AGPL-3.0
6 #
7
8 from __future__ import print_function, absolute_import
9 import argparse
10 import arvados
11 import arvados.util
12 import csv
13 import sys
14
15 """
16 Given a list of collections missing blocks (as produced by
17 keep-balance), delete the collections and re-run associated containers.
18 """
19
20 def rerun_request(arv, container_requests_to_rerun, ct):
21     requests = arvados.util.list_all(arv.container_requests().list, filters=[["container_uuid", "=", ct["uuid"]]])
22     for cr in requests:
23         if cr["requesting_container_uuid"]:
24             rerun_request(arv, container_requests_to_rerun, arv.containers().get(uuid=cr["requesting_container_uuid"]).execute())
25         else:
26             container_requests_to_rerun[cr["uuid"]] = cr
27
28 def get_owner(arv, owners, uuid):
29     if uuid not in owners:
30         if uuid[6:11] == "tpzed":
31             owners[uuid] = arv.users().get(uuid=uuid).execute()["full_name"]
32         else:
33             owners[uuid] = arv.groups().get(uuid=uuid).execute()["name"]
34     return owners[uuid]
35
36 def main():
37     parser = argparse.ArgumentParser(description='Re-run containers associated with missing blocks')
38     parser.add_argument('inp')
39     args = parser.parse_args()
40
41     arv = arvados.api('v1')
42
43     busted_collections = set()
44
45     # Get the list of bad collection PDHs
46     blocksfile = open(args.inp, "rt")
47     for line in blocksfile:
48         # Ignore the first item, that's the block id
49         collections = line.rstrip().split(" ")[1:]
50         for c in collections:
51             busted_collections.add(c)
52
53     out = csv.writer(sys.stdout)
54
55     out.writerow(("collection uuid", "container request uuid", "record name", "modified at", "owner uuid", "owner name", "notes"))
56
57     owners = {}
58     collections_to_delete = {}
59     container_requests_to_rerun = {}
60     # Get containers that produced these collections
61     i = 0
62     for b in busted_collections:
63         i += 1
64         collections_to_delete = arvados.util.list_all(arv.collections().list, filters=[["portable_data_hash", "=", b]])
65         for d in collections_to_delete:
66             t = ""
67             if d["properties"].get("type") not in ("output", "log"):
68                 t = "\"type\" was '%s', expected one of 'output' or 'log'" % d["properties"].get("type")
69             out.writerow((d["uuid"], "", d["name"], d["modified_at"], d["owner_uuid"], get_owner(arv, owners, d["owner_uuid"]), t))
70
71         maybe_containers_to_rerun = arvados.util.list_all(arv.containers().list, filters=[["output", "=", b]])
72         for ct in maybe_containers_to_rerun:
73             rerun_request(arv, container_requests_to_rerun, ct)
74
75     i = 0
76     for _, cr in container_requests_to_rerun.items():
77         i += 1
78         out.writerow(("", cr["uuid"], cr["name"], cr["modified_at"], cr["owner_uuid"], get_owner(arv, owners, cr["owner_uuid"]), ""))
79
80
81 if __name__ == "__main__":
82     main()