1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 from __future__ import print_function
6 from __future__ import division
18 import arvados.commands.keepdocker
19 from arvados._version import __version__
20 from arvados.collection import CollectionReader
22 logger = logging.getLogger('arvados.migrate-docker19')
23 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
26 _migration_link_class = 'docker_image_migration'
27 _migration_link_name = 'migrate_1.9_1.10'
29 class MigrationFailed(Exception):
32 def main(arguments=None):
33 """Docker image format migration tool for Arvados.
35 This converts Docker images stored in Arvados from image format v1
36 (Docker <= 1.9) to image format v2 (Docker >= 1.10).
38 Requires Docker running on the local host.
42 1) Run arvados/docker/migrate-docker19/build.sh to create
43 arvados/migrate-docker19 Docker image.
45 2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
47 3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
49 This will query Arvados for v1 format Docker images. For each image that
50 does not already have a corresponding v2 format image (as indicated by a
51 docker_image_migration tag) it will perform the following process:
53 i) download the image from Arvados
54 ii) load it into Docker
55 iii) update the Docker version, which updates the image
56 iv) save the v2 format image and upload to Arvados
57 v) create a migration link
61 migrate19_parser = argparse.ArgumentParser()
62 migrate19_parser.add_argument(
63 '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
64 help='Print version and exit.')
65 migrate19_parser.add_argument(
66 '--verbose', action="store_true", help="Print stdout/stderr even on success")
67 migrate19_parser.add_argument(
68 '--force', action="store_true", help="Try to migrate even if there isn't enough space")
70 migrate19_parser.add_argument(
71 '--storage-driver', type=str, default="overlay",
72 help="Docker storage driver, e.g. aufs, overlay, vfs")
74 exgroup = migrate19_parser.add_mutually_exclusive_group()
76 '--dry-run', action='store_true', help="Print number of pending migrations.")
78 '--print-unmigrated', action='store_true',
79 default=False, help="Print list of images needing migration.")
81 migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
83 migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
84 default=None, help="List of images to be migrated")
86 args = migrate19_parser.parse_args(arguments)
89 tempfile.tempdir = args.tempdir
92 logger.setLevel(logging.DEBUG)
98 only_migrate.add(l.strip())
100 api_client = arvados.api()
102 user = api_client.users().current().execute()
103 if not user['is_admin']:
104 raise Exception("This command requires an admin token")
105 sys_uuid = user['uuid'][:12] + '000000000000000'
107 images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
109 is_new = lambda img: img['dockerhash'].startswith('sha256:')
113 for uuid, img in images:
114 if img["dockerhash"].startswith("sha256:"):
116 key = (img["repo"], img["tag"], img["timestamp"])
117 old_images.append(img)
119 migration_links = arvados.util.list_all(api_client.links().list, filters=[
120 ['link_class', '=', _migration_link_class],
121 ['name', '=', _migration_link_name],
124 already_migrated = set()
125 for m in migration_links:
126 already_migrated.add(m["tail_uuid"])
128 items = arvados.util.list_all(api_client.collections().list,
129 filters=[["uuid", "in", [img["collection"] for img in old_images]]],
130 select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
131 uuid_to_collection = {i["uuid"]: i for i in items}
137 for img in old_images:
138 i = uuid_to_collection[img["collection"]]
139 pdh = i["portable_data_hash"]
140 if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate):
141 need_migrate[pdh] = img
142 with CollectionReader(i["manifest_text"]) as c:
143 size = list(c.values())[0].size()
150 if args.storage_driver == "vfs":
151 will_need = (biggest*20)
153 will_need = (biggest*2.5)
155 if args.print_unmigrated:
157 for pdh in need_migrate:
161 logger.info("Already migrated %i images", len(already_migrated))
162 logger.info("Need to migrate %i images", len(need_migrate))
163 logger.info("Using tempdir %s", tempfile.gettempdir())
164 logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest>>20)
165 logger.info("Total data to migrate about %i MiB", totalbytes>>20)
167 df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()])
168 ln = df_out.splitlines()[1]
169 filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1)
170 if int(available) <= will_need:
171 logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)>>20, will_need>>20)
175 logger.warn("--force provided, will migrate anyway")
183 for old_image in list(need_migrate.values()):
184 if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
187 oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
188 tarfile = list(oldcol.keys())[0]
190 logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
191 old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20)
195 varlibdocker = tempfile.mkdtemp()
196 dockercache = tempfile.mkdtemp()
198 with tempfile.NamedTemporaryFile() as envfile:
199 envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST")))
200 envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN")))
201 if arvados.config.get("ARVADOS_API_HOST_INSECURE"):
202 envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE")))
205 dockercmd = ["docker", "run",
208 "--env-file", envfile.name,
209 "--volume", "%s:/var/lib/docker" % varlibdocker,
210 "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
211 "arvados/migrate-docker19:1.0",
213 "%s/%s" % (old_image["collection"], tarfile),
217 uuid_to_collection[old_image["collection"]]["owner_uuid"],
220 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
221 out, err = proc.communicate()
223 initial_space = re.search(r"Initial available space is (\d+)", out)
224 imgload_space = re.search(r"Available space after image load is (\d+)", out)
225 imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out)
226 keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out)
227 cleanup_space = re.search(r"Available space after cleanup is (\d+)", out)
230 isp = int(initial_space.group(1))
231 logger.info("Available space initially: %i MiB", (isp)/(2**20))
233 sp = int(imgload_space.group(1))
234 logger.debug("Used after load: %i MiB", (isp-sp)/(2**20))
236 sp = int(imgupgrade_space.group(1))
237 logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20))
239 sp = int(keepdocker_space.group(1))
240 logger.info("Used after upload: %i MiB", (isp-sp)/(2**20))
243 sp = int(cleanup_space.group(1))
244 logger.debug("Available after cleanup: %i MiB", (sp)/(2**20))
246 if proc.returncode != 0:
247 logger.error("Failed with return code %i", proc.returncode)
248 logger.error("--- Stdout ---\n%s", out)
249 logger.error("--- Stderr ---\n%s", err)
250 raise MigrationFailed()
253 logger.info("--- Stdout ---\n%s", out)
254 logger.info("--- Stderr ---\n%s", err)
256 migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
258 newcol = CollectionReader(migrated.group(1))
260 api_client.links().create(body={"link": {
261 'owner_uuid': sys_uuid,
262 'link_class': _migration_link_class,
263 'name': _migration_link_name,
264 'tail_uuid': oldcol.portable_data_hash(),
265 'head_uuid': newcol.portable_data_hash()
266 }}).execute(num_retries=3)
268 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
269 oldcol.portable_data_hash(), old_image["collection"],
270 newcol.portable_data_hash(), migrated.group(1),
272 already_migrated.add(oldcol.portable_data_hash())
273 success.append(old_image["collection"])
275 logger.error("Error migrating '%s'", old_image["collection"])
276 failures.append(old_image["collection"])
277 except Exception as e:
278 logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
279 exc_info=(not isinstance(e, MigrationFailed)))
280 failures.append(old_image["collection"])
282 shutil.rmtree(varlibdocker)
283 shutil.rmtree(dockercache)
285 logger.info("Successfully migrated %i images", len(success))
287 logger.error("Failed to migrate %i images", len(failures))