1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 from __future__ import print_function
6 from __future__ import division
18 import arvados.commands.keepdocker
19 from arvados._version import __version__
20 from arvados.collection import CollectionReader
23 logger = logging.getLogger('arvados.migrate-docker19')
24 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
27 _migration_link_class = 'docker_image_migration'
28 _migration_link_name = 'migrate_1.9_1.10'
30 class MigrationFailed(Exception):
33 @util._deprecated('3.0')
34 def main(arguments=None):
35 """Docker image format migration tool for Arvados.
37 This converts Docker images stored in Arvados from image format v1
38 (Docker <= 1.9) to image format v2 (Docker >= 1.10).
40 Requires Docker running on the local host.
44 1) Run arvados/docker/migrate-docker19/build.sh to create
45 arvados/migrate-docker19 Docker image.
47 2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
49 3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
51 This will query Arvados for v1 format Docker images. For each image that
52 does not already have a corresponding v2 format image (as indicated by a
53 docker_image_migration tag) it will perform the following process:
55 i) download the image from Arvados
56 ii) load it into Docker
57 iii) update the Docker version, which updates the image
58 iv) save the v2 format image and upload to Arvados
59 v) create a migration link
63 migrate19_parser = argparse.ArgumentParser()
64 migrate19_parser.add_argument(
65 '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
66 help='Print version and exit.')
67 migrate19_parser.add_argument(
68 '--verbose', action="store_true", help="Print stdout/stderr even on success")
69 migrate19_parser.add_argument(
70 '--force', action="store_true", help="Try to migrate even if there isn't enough space")
72 migrate19_parser.add_argument(
73 '--storage-driver', type=str, default="overlay",
74 help="Docker storage driver, e.g. aufs, overlay, vfs")
76 exgroup = migrate19_parser.add_mutually_exclusive_group()
78 '--dry-run', action='store_true', help="Print number of pending migrations.")
80 '--print-unmigrated', action='store_true',
81 default=False, help="Print list of images needing migration.")
83 migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
85 migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
86 default=None, help="List of images to be migrated")
88 args = migrate19_parser.parse_args(arguments)
91 tempfile.tempdir = args.tempdir
94 logger.setLevel(logging.DEBUG)
100 only_migrate.add(l.strip())
102 api_client = arvados.api()
104 user = api_client.users().current().execute()
105 if not user['is_admin']:
106 raise Exception("This command requires an admin token")
107 sys_uuid = user['uuid'][:12] + '000000000000000'
109 images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
111 is_new = lambda img: img['dockerhash'].startswith('sha256:')
115 for uuid, img in images:
116 if img["dockerhash"].startswith("sha256:"):
118 key = (img["repo"], img["tag"], img["timestamp"])
119 old_images.append(img)
121 migration_links = arvados.util.list_all(api_client.links().list, filters=[
122 ['link_class', '=', _migration_link_class],
123 ['name', '=', _migration_link_name],
126 already_migrated = set()
127 for m in migration_links:
128 already_migrated.add(m["tail_uuid"])
130 items = arvados.util.list_all(api_client.collections().list,
131 filters=[["uuid", "in", [img["collection"] for img in old_images]]],
132 select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
133 uuid_to_collection = {i["uuid"]: i for i in items}
139 for img in old_images:
140 i = uuid_to_collection[img["collection"]]
141 pdh = i["portable_data_hash"]
142 if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate):
143 need_migrate[pdh] = img
144 with CollectionReader(i["manifest_text"]) as c:
145 size = list(c.values())[0].size()
152 if args.storage_driver == "vfs":
153 will_need = (biggest*20)
155 will_need = (biggest*2.5)
157 if args.print_unmigrated:
159 for pdh in need_migrate:
163 logger.info("Already migrated %i images", len(already_migrated))
164 logger.info("Need to migrate %i images", len(need_migrate))
165 logger.info("Using tempdir %s", tempfile.gettempdir())
166 logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest>>20)
167 logger.info("Total data to migrate about %i MiB", totalbytes>>20)
169 df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()])
170 ln = df_out.splitlines()[1]
171 filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1)
172 if int(available) <= will_need:
173 logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)>>20, int(will_need)>>20)
177 logger.warn("--force provided, will migrate anyway")
185 for old_image in list(need_migrate.values()):
186 if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
189 oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
190 tarfile = list(oldcol.keys())[0]
192 logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
193 old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20)
197 varlibdocker = tempfile.mkdtemp()
198 dockercache = tempfile.mkdtemp()
200 with tempfile.NamedTemporaryFile() as envfile:
201 envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST")))
202 envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN")))
203 if arvados.config.get("ARVADOS_API_HOST_INSECURE"):
204 envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE")))
207 dockercmd = ["docker", "run",
210 "--env-file", envfile.name,
211 "--volume", "%s:/var/lib/docker" % varlibdocker,
212 "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
213 "arvados/migrate-docker19:1.0",
215 "%s/%s" % (old_image["collection"], tarfile),
219 uuid_to_collection[old_image["collection"]]["owner_uuid"],
222 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
223 out, err = proc.communicate()
225 initial_space = re.search(r"Initial available space is (\d+)", out)
226 imgload_space = re.search(r"Available space after image load is (\d+)", out)
227 imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out)
228 keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out)
229 cleanup_space = re.search(r"Available space after cleanup is (\d+)", out)
232 isp = int(initial_space.group(1))
233 logger.info("Available space initially: %i MiB", (isp)/(2**20))
235 sp = int(imgload_space.group(1))
236 logger.debug("Used after load: %i MiB", (isp-sp)/(2**20))
238 sp = int(imgupgrade_space.group(1))
239 logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20))
241 sp = int(keepdocker_space.group(1))
242 logger.info("Used after upload: %i MiB", (isp-sp)/(2**20))
245 sp = int(cleanup_space.group(1))
246 logger.debug("Available after cleanup: %i MiB", (sp)/(2**20))
248 if proc.returncode != 0:
249 logger.error("Failed with return code %i", proc.returncode)
250 logger.error("--- Stdout ---\n%s", out)
251 logger.error("--- Stderr ---\n%s", err)
252 raise MigrationFailed()
255 logger.info("--- Stdout ---\n%s", out)
256 logger.info("--- Stderr ---\n%s", err)
258 migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
260 newcol = CollectionReader(migrated.group(1))
262 api_client.links().create(body={"link": {
263 'owner_uuid': sys_uuid,
264 'link_class': _migration_link_class,
265 'name': _migration_link_name,
266 'tail_uuid': oldcol.portable_data_hash(),
267 'head_uuid': newcol.portable_data_hash()
268 }}).execute(num_retries=3)
270 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
271 oldcol.portable_data_hash(), old_image["collection"],
272 newcol.portable_data_hash(), migrated.group(1),
274 already_migrated.add(oldcol.portable_data_hash())
275 success.append(old_image["collection"])
277 logger.error("Error migrating '%s'", old_image["collection"])
278 failures.append(old_image["collection"])
279 except Exception as e:
280 logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
281 exc_info=(not isinstance(e, MigrationFailed)))
282 failures.append(old_image["collection"])
284 shutil.rmtree(varlibdocker)
285 shutil.rmtree(dockercache)
287 logger.info("Successfully migrated %i images", len(success))
289 logger.error("Failed to migrate %i images", len(failures))