15148: Include list of affected PDHs in LostBlocksFile.
[arvados.git] / sdk / python / arvados / commands / migrate19.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import print_function
6 from __future__ import division
7 import argparse
8 import time
9 import sys
10 import logging
11 import shutil
12 import tempfile
13 import os
14 import subprocess
15 import re
16
17 import arvados
18 import arvados.commands.keepdocker
19 from arvados._version import __version__
20 from arvados.collection import CollectionReader
21
22 logger = logging.getLogger('arvados.migrate-docker19')
23 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
24                 else logging.INFO)
25
26 _migration_link_class = 'docker_image_migration'
27 _migration_link_name = 'migrate_1.9_1.10'
28
29 class MigrationFailed(Exception):
30     pass
31
32 def main(arguments=None):
33     """Docker image format migration tool for Arvados.
34
35     This converts Docker images stored in Arvados from image format v1
36     (Docker <= 1.9) to image format v2 (Docker >= 1.10).
37
38     Requires Docker running on the local host.
39
40     Usage:
41
42     1) Run arvados/docker/migrate-docker19/build.sh to create
43     arvados/migrate-docker19 Docker image.
44
45     2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
46
47     3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
48
49     This will query Arvados for v1 format Docker images.  For each image that
50     does not already have a corresponding v2 format image (as indicated by a
51     docker_image_migration tag) it will perform the following process:
52
53     i) download the image from Arvados
54     ii) load it into Docker
55     iii) update the Docker version, which updates the image
56     iv) save the v2 format image and upload to Arvados
57     v) create a migration link
58
59     """
60
61     migrate19_parser = argparse.ArgumentParser()
62     migrate19_parser.add_argument(
63         '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
64         help='Print version and exit.')
65     migrate19_parser.add_argument(
66         '--verbose', action="store_true", help="Print stdout/stderr even on success")
67     migrate19_parser.add_argument(
68         '--force', action="store_true", help="Try to migrate even if there isn't enough space")
69
70     migrate19_parser.add_argument(
71         '--storage-driver', type=str, default="overlay",
72         help="Docker storage driver, e.g. aufs, overlay, vfs")
73
74     exgroup = migrate19_parser.add_mutually_exclusive_group()
75     exgroup.add_argument(
76         '--dry-run', action='store_true', help="Print number of pending migrations.")
77     exgroup.add_argument(
78         '--print-unmigrated', action='store_true',
79         default=False, help="Print list of images needing migration.")
80
81     migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
82
83     migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
84                                   default=None, help="List of images to be migrated")
85
86     args = migrate19_parser.parse_args(arguments)
87
88     if args.tempdir:
89         tempfile.tempdir = args.tempdir
90
91     if args.verbose:
92         logger.setLevel(logging.DEBUG)
93
94     only_migrate = None
95     if args.infile:
96         only_migrate = set()
97         for l in args.infile:
98             only_migrate.add(l.strip())
99
100     api_client  = arvados.api()
101
102     user = api_client.users().current().execute()
103     if not user['is_admin']:
104         raise Exception("This command requires an admin token")
105     sys_uuid = user['uuid'][:12] + '000000000000000'
106
107     images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
108
109     is_new = lambda img: img['dockerhash'].startswith('sha256:')
110
111     count_new = 0
112     old_images = []
113     for uuid, img in images:
114         if img["dockerhash"].startswith("sha256:"):
115             continue
116         key = (img["repo"], img["tag"], img["timestamp"])
117         old_images.append(img)
118
119     migration_links = arvados.util.list_all(api_client.links().list, filters=[
120         ['link_class', '=', _migration_link_class],
121         ['name', '=', _migration_link_name],
122     ])
123
124     already_migrated = set()
125     for m in migration_links:
126         already_migrated.add(m["tail_uuid"])
127
128     items = arvados.util.list_all(api_client.collections().list,
129                                   filters=[["uuid", "in", [img["collection"] for img in old_images]]],
130                                   select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
131     uuid_to_collection = {i["uuid"]: i for i in items}
132
133     need_migrate = {}
134     totalbytes = 0
135     biggest = 0
136     biggest_pdh = None
137     for img in old_images:
138         i = uuid_to_collection[img["collection"]]
139         pdh = i["portable_data_hash"]
140         if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate):
141             need_migrate[pdh] = img
142             with CollectionReader(i["manifest_text"]) as c:
143                 size = list(c.values())[0].size()
144                 if size > biggest:
145                     biggest = size
146                     biggest_pdh = pdh
147                 totalbytes += size
148
149
150     if args.storage_driver == "vfs":
151         will_need = (biggest*20)
152     else:
153         will_need = (biggest*2.5)
154
155     if args.print_unmigrated:
156         only_migrate = set()
157         for pdh in need_migrate:
158             print(pdh)
159         return
160
161     logger.info("Already migrated %i images", len(already_migrated))
162     logger.info("Need to migrate %i images", len(need_migrate))
163     logger.info("Using tempdir %s", tempfile.gettempdir())
164     logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest>>20)
165     logger.info("Total data to migrate about %i MiB", totalbytes>>20)
166
167     df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()])
168     ln = df_out.splitlines()[1]
169     filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1)
170     if int(available) <= will_need:
171         logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)>>20, int(will_need)>>20)
172         if not args.force:
173             exit(1)
174         else:
175             logger.warn("--force provided, will migrate anyway")
176
177     if args.dry_run:
178         return
179
180     success = []
181     failures = []
182     count = 1
183     for old_image in list(need_migrate.values()):
184         if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
185             continue
186
187         oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
188         tarfile = list(oldcol.keys())[0]
189
190         logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
191                     old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20)
192         count += 1
193         start = time.time()
194
195         varlibdocker = tempfile.mkdtemp()
196         dockercache = tempfile.mkdtemp()
197         try:
198             with tempfile.NamedTemporaryFile() as envfile:
199                 envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST")))
200                 envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN")))
201                 if arvados.config.get("ARVADOS_API_HOST_INSECURE"):
202                     envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE")))
203                 envfile.flush()
204
205                 dockercmd = ["docker", "run",
206                              "--privileged",
207                              "--rm",
208                              "--env-file", envfile.name,
209                              "--volume", "%s:/var/lib/docker" % varlibdocker,
210                              "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
211                              "arvados/migrate-docker19:1.0",
212                              "/root/migrate.sh",
213                              "%s/%s" % (old_image["collection"], tarfile),
214                              tarfile[0:40],
215                              old_image["repo"],
216                              old_image["tag"],
217                              uuid_to_collection[old_image["collection"]]["owner_uuid"],
218                              args.storage_driver]
219
220                 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
221                 out, err = proc.communicate()
222
223                 initial_space = re.search(r"Initial available space is (\d+)", out)
224                 imgload_space = re.search(r"Available space after image load is (\d+)", out)
225                 imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out)
226                 keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out)
227                 cleanup_space = re.search(r"Available space after cleanup is (\d+)", out)
228
229                 if initial_space:
230                     isp = int(initial_space.group(1))
231                     logger.info("Available space initially: %i MiB", (isp)/(2**20))
232                     if imgload_space:
233                         sp = int(imgload_space.group(1))
234                         logger.debug("Used after load: %i MiB", (isp-sp)/(2**20))
235                     if imgupgrade_space:
236                         sp = int(imgupgrade_space.group(1))
237                         logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20))
238                     if keepdocker_space:
239                         sp = int(keepdocker_space.group(1))
240                         logger.info("Used after upload: %i MiB", (isp-sp)/(2**20))
241
242                 if cleanup_space:
243                     sp = int(cleanup_space.group(1))
244                     logger.debug("Available after cleanup: %i MiB", (sp)/(2**20))
245
246                 if proc.returncode != 0:
247                     logger.error("Failed with return code %i", proc.returncode)
248                     logger.error("--- Stdout ---\n%s", out)
249                     logger.error("--- Stderr ---\n%s", err)
250                     raise MigrationFailed()
251
252                 if args.verbose:
253                     logger.info("--- Stdout ---\n%s", out)
254                     logger.info("--- Stderr ---\n%s", err)
255
256             migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
257             if migrated:
258                 newcol = CollectionReader(migrated.group(1))
259
260                 api_client.links().create(body={"link": {
261                     'owner_uuid': sys_uuid,
262                     'link_class': _migration_link_class,
263                     'name': _migration_link_name,
264                     'tail_uuid': oldcol.portable_data_hash(),
265                     'head_uuid': newcol.portable_data_hash()
266                     }}).execute(num_retries=3)
267
268                 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
269                             oldcol.portable_data_hash(), old_image["collection"],
270                             newcol.portable_data_hash(), migrated.group(1),
271                             time.time() - start)
272                 already_migrated.add(oldcol.portable_data_hash())
273                 success.append(old_image["collection"])
274             else:
275                 logger.error("Error migrating '%s'", old_image["collection"])
276                 failures.append(old_image["collection"])
277         except Exception as e:
278             logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
279                          exc_info=(not isinstance(e, MigrationFailed)))
280             failures.append(old_image["collection"])
281         finally:
282             shutil.rmtree(varlibdocker)
283             shutil.rmtree(dockercache)
284
285     logger.info("Successfully migrated %i images", len(success))
286     if failures:
287         logger.error("Failed to migrate %i images", len(failures))