1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
16 import arvados.commands.keepdocker
17 from arvados._version import __version__
18 from arvados.collection import CollectionReader
21 logger = logging.getLogger('arvados.migrate-docker19')
22 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
25 _migration_link_class = 'docker_image_migration'
26 _migration_link_name = 'migrate_1.9_1.10'
28 class MigrationFailed(Exception):
31 @util._deprecated('3.0')
32 def main(arguments=None):
33 """Docker image format migration tool for Arvados.
35 This converts Docker images stored in Arvados from image format v1
36 (Docker <= 1.9) to image format v2 (Docker >= 1.10).
38 Requires Docker running on the local host.
42 1) Run arvados/docker/migrate-docker19/build.sh to create
43 arvados/migrate-docker19 Docker image.
45 2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
47 3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
49 This will query Arvados for v1 format Docker images. For each image that
50 does not already have a corresponding v2 format image (as indicated by a
51 docker_image_migration tag) it will perform the following process:
53 i) download the image from Arvados
54 ii) load it into Docker
55 iii) update the Docker version, which updates the image
56 iv) save the v2 format image and upload to Arvados
57 v) create a migration link
61 migrate19_parser = argparse.ArgumentParser()
62 migrate19_parser.add_argument(
63 '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
64 help='Print version and exit.')
65 migrate19_parser.add_argument(
66 '--verbose', action="store_true", help="Print stdout/stderr even on success")
67 migrate19_parser.add_argument(
68 '--force', action="store_true", help="Try to migrate even if there isn't enough space")
70 migrate19_parser.add_argument(
71 '--storage-driver', type=str, default="overlay",
72 help="Docker storage driver, e.g. aufs, overlay, vfs")
74 exgroup = migrate19_parser.add_mutually_exclusive_group()
76 '--dry-run', action='store_true', help="Print number of pending migrations.")
78 '--print-unmigrated', action='store_true',
79 default=False, help="Print list of images needing migration.")
81 migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
83 migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
84 default=None, help="List of images to be migrated")
86 args = migrate19_parser.parse_args(arguments)
89 tempfile.tempdir = args.tempdir
92 logger.setLevel(logging.DEBUG)
98 only_migrate.add(l.strip())
100 api_client = arvados.api()
102 user = api_client.users().current().execute()
103 if not user['is_admin']:
104 raise Exception("This command requires an admin token")
105 sys_uuid = user['uuid'][:12] + '000000000000000'
107 images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
109 is_new = lambda img: img['dockerhash'].startswith('sha256:')
113 for uuid, img in images:
114 if img["dockerhash"].startswith("sha256:"):
116 key = (img["repo"], img["tag"], img["timestamp"])
117 old_images.append(img)
119 migration_links = arvados.util.keyset_list_all(
120 api_client.links().list,
122 ['link_class', '=', _migration_link_class],
123 ['name', '=', _migration_link_name],
127 already_migrated = set()
128 for m in migration_links:
129 already_migrated.add(m["tail_uuid"])
131 items = arvados.util.keyset_list_all(
132 api_client.collections().list,
133 filters=[["uuid", "in", [img["collection"] for img in old_images]]],
134 select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"],
136 uuid_to_collection = {i["uuid"]: i for i in items}
142 for img in old_images:
143 i = uuid_to_collection[img["collection"]]
144 pdh = i["portable_data_hash"]
145 if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate):
146 need_migrate[pdh] = img
147 with CollectionReader(i["manifest_text"]) as c:
148 size = list(c.values())[0].size()
155 if args.storage_driver == "vfs":
156 will_need = (biggest*20)
158 will_need = (biggest*2.5)
160 if args.print_unmigrated:
162 for pdh in need_migrate:
166 logger.info("Already migrated %i images", len(already_migrated))
167 logger.info("Need to migrate %i images", len(need_migrate))
168 logger.info("Using tempdir %s", tempfile.gettempdir())
169 logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest>>20)
170 logger.info("Total data to migrate about %i MiB", totalbytes>>20)
172 df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()])
173 ln = df_out.splitlines()[1]
174 filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1)
175 if int(available) <= will_need:
176 logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)>>20, int(will_need)>>20)
180 logger.warn("--force provided, will migrate anyway")
188 for old_image in list(need_migrate.values()):
189 if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
192 oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
193 tarfile = list(oldcol.keys())[0]
195 logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
196 old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20)
200 varlibdocker = tempfile.mkdtemp()
201 dockercache = tempfile.mkdtemp()
203 with tempfile.NamedTemporaryFile() as envfile:
204 envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST")))
205 envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN")))
206 if arvados.config.get("ARVADOS_API_HOST_INSECURE"):
207 envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE")))
210 dockercmd = ["docker", "run",
213 "--env-file", envfile.name,
214 "--volume", "%s:/var/lib/docker" % varlibdocker,
215 "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
216 "arvados/migrate-docker19:1.0",
218 "%s/%s" % (old_image["collection"], tarfile),
222 uuid_to_collection[old_image["collection"]]["owner_uuid"],
225 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
226 out, err = proc.communicate()
228 initial_space = re.search(r"Initial available space is (\d+)", out)
229 imgload_space = re.search(r"Available space after image load is (\d+)", out)
230 imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out)
231 keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out)
232 cleanup_space = re.search(r"Available space after cleanup is (\d+)", out)
235 isp = int(initial_space.group(1))
236 logger.info("Available space initially: %i MiB", (isp)/(2**20))
238 sp = int(imgload_space.group(1))
239 logger.debug("Used after load: %i MiB", (isp-sp)/(2**20))
241 sp = int(imgupgrade_space.group(1))
242 logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20))
244 sp = int(keepdocker_space.group(1))
245 logger.info("Used after upload: %i MiB", (isp-sp)/(2**20))
248 sp = int(cleanup_space.group(1))
249 logger.debug("Available after cleanup: %i MiB", (sp)/(2**20))
251 if proc.returncode != 0:
252 logger.error("Failed with return code %i", proc.returncode)
253 logger.error("--- Stdout ---\n%s", out)
254 logger.error("--- Stderr ---\n%s", err)
255 raise MigrationFailed()
258 logger.info("--- Stdout ---\n%s", out)
259 logger.info("--- Stderr ---\n%s", err)
261 migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
263 newcol = CollectionReader(migrated.group(1))
265 api_client.links().create(body={"link": {
266 'owner_uuid': sys_uuid,
267 'link_class': _migration_link_class,
268 'name': _migration_link_name,
269 'tail_uuid': oldcol.portable_data_hash(),
270 'head_uuid': newcol.portable_data_hash()
271 }}).execute(num_retries=3)
273 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
274 oldcol.portable_data_hash(), old_image["collection"],
275 newcol.portable_data_hash(), migrated.group(1),
277 already_migrated.add(oldcol.portable_data_hash())
278 success.append(old_image["collection"])
280 logger.error("Error migrating '%s'", old_image["collection"])
281 failures.append(old_image["collection"])
282 except Exception as e:
283 logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
284 exc_info=(not isinstance(e, MigrationFailed)))
285 failures.append(old_image["collection"])
287 shutil.rmtree(varlibdocker)
288 shutil.rmtree(dockercache)
290 logger.info("Successfully migrated %i images", len(success))
292 logger.error("Failed to migrate %i images", len(failures))