12 import arvados.commands.keepdocker
13 from arvados._version import __version__
14 from arvados.collection import CollectionReader
16 logger = logging.getLogger('arvados.migrate-docker19')
17 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
20 _migration_link_class = 'docker_image_migration'
21 _migration_link_name = 'migrate_1.9_1.10'
23 class MigrationFailed(Exception):
26 def main(arguments=None):
27 """Docker image format migration tool for Arvados.
29 This converts Docker images stored in Arvados from image format v1
30 (Docker <= 1.9) to image format v2 (Docker >= 1.10).
32 Requires Docker running on the local host.
36 1) Run arvados/docker/migrate-docker19/build.sh to create
37 arvados/migrate-docker19 Docker image.
39 2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
41 3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
43 This will query Arvados for v1 format Docker images. For each image that
44 does not already have a corresponding v2 format image (as indicated by a
45 docker_image_migration tag) it will perform the following process:
47 i) download the image from Arvados
48 ii) load it into Docker
49 iii) update the Docker version, which updates the image
50 iv) save the v2 format image and upload to Arvados
51 v) create a migration link
55 migrate19_parser = argparse.ArgumentParser()
56 migrate19_parser.add_argument(
57 '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
58 help='Print version and exit.')
59 migrate19_parser.add_argument(
60 '--verbose', action="store_true", help="Print stdout/stderr even on success")
61 migrate19_parser.add_argument(
62 '--force', action="store_true", help="Try to migrate even if there isn't enough space")
64 migrate19_parser.add_argument(
65 '--storage-driver', type=str, default="overlay",
66 help="Docker storage driver, e.g. aufs, overlay, vfs")
68 exgroup = migrate19_parser.add_mutually_exclusive_group()
70 '--dry-run', action='store_true', help="Print number of pending migrations.")
72 '--print-unmigrated', action='store_true',
73 default=False, help="Print list of images needing migration.")
75 migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
77 migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
78 default=None, help="List of images to be migrated")
80 args = migrate19_parser.parse_args(arguments)
83 tempfile.tempdir = args.tempdir
86 logger.setLevel(logging.DEBUG)
92 only_migrate.add(l.strip())
94 api_client = arvados.api()
96 user = api_client.users().current().execute()
97 if not user['is_admin']:
98 raise Exception("This command requires an admin token")
99 sys_uuid = user['uuid'][:12] + '000000000000000'
101 images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
103 is_new = lambda img: img['dockerhash'].startswith('sha256:')
107 for uuid, img in images:
108 if img["dockerhash"].startswith("sha256:"):
110 key = (img["repo"], img["tag"], img["timestamp"])
111 old_images.append(img)
113 migration_links = arvados.util.list_all(api_client.links().list, filters=[
114 ['link_class', '=', _migration_link_class],
115 ['name', '=', _migration_link_name],
118 already_migrated = set()
119 for m in migration_links:
120 already_migrated.add(m["tail_uuid"])
122 items = arvados.util.list_all(api_client.collections().list,
123 filters=[["uuid", "in", [img["collection"] for img in old_images]]],
124 select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
125 uuid_to_collection = {i["uuid"]: i for i in items}
131 for img in old_images:
132 i = uuid_to_collection[img["collection"]]
133 pdh = i["portable_data_hash"]
134 if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate):
135 need_migrate[pdh] = img
136 with CollectionReader(i["manifest_text"]) as c:
137 if c.values()[0].size() > biggest:
138 biggest = c.values()[0].size()
140 totalbytes += c.values()[0].size()
143 if args.storage_driver == "vfs":
144 will_need = (biggest*20)
146 will_need = (biggest*2.5)
148 if args.print_unmigrated:
150 for pdh in need_migrate:
154 logger.info("Already migrated %i images", len(already_migrated))
155 logger.info("Need to migrate %i images", len(need_migrate))
156 logger.info("Using tempdir %s", tempfile.gettempdir())
157 logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest/(2**20))
158 logger.info("Total data to migrate about %i MiB", totalbytes/(2**20))
160 df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()])
161 ln = df_out.splitlines()[1]
162 filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1)
163 if int(available) <= will_need:
164 logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)/(2**20), will_need/(2**20))
168 logger.warn("--force provided, will migrate anyway")
176 for old_image in need_migrate.values():
177 if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
180 oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
181 tarfile = oldcol.keys()[0]
183 logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
184 old_image["tag"], old_image["collection"], oldcol.values()[0].size()/(2**20))
188 varlibdocker = tempfile.mkdtemp()
189 dockercache = tempfile.mkdtemp()
191 with tempfile.NamedTemporaryFile() as envfile:
192 envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST")))
193 envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN")))
194 if arvados.config.get("ARVADOS_API_HOST_INSECURE"):
195 envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE")))
198 dockercmd = ["docker", "run",
201 "--env-file", envfile.name,
202 "--volume", "%s:/var/lib/docker" % varlibdocker,
203 "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
204 "arvados/migrate-docker19:1.0",
206 "%s/%s" % (old_image["collection"], tarfile),
210 uuid_to_collection[old_image["collection"]]["owner_uuid"],
213 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
214 out, err = proc.communicate()
216 initial_space = re.search(r"Initial available space is (\d+)", out)
217 imgload_space = re.search(r"Available space after image load is (\d+)", out)
218 imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out)
219 keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out)
220 cleanup_space = re.search(r"Available space after cleanup is (\d+)", out)
223 isp = int(initial_space.group(1))
224 logger.info("Available space initially: %i MiB", (isp)/(2**20))
226 sp = int(imgload_space.group(1))
227 logger.debug("Used after load: %i MiB", (isp-sp)/(2**20))
229 sp = int(imgupgrade_space.group(1))
230 logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20))
232 sp = int(keepdocker_space.group(1))
233 logger.info("Used after upload: %i MiB", (isp-sp)/(2**20))
236 sp = int(cleanup_space.group(1))
237 logger.debug("Available after cleanup: %i MiB", (sp)/(2**20))
239 if proc.returncode != 0:
240 logger.error("Failed with return code %i", proc.returncode)
241 logger.error("--- Stdout ---\n%s", out)
242 logger.error("--- Stderr ---\n%s", err)
243 raise MigrationFailed()
246 logger.info("--- Stdout ---\n%s", out)
247 logger.info("--- Stderr ---\n%s", err)
249 migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
251 newcol = CollectionReader(migrated.group(1))
253 api_client.links().create(body={"link": {
254 'owner_uuid': sys_uuid,
255 'link_class': _migration_link_class,
256 'name': _migration_link_name,
257 'tail_uuid': oldcol.portable_data_hash(),
258 'head_uuid': newcol.portable_data_hash()
259 }}).execute(num_retries=3)
261 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
262 oldcol.portable_data_hash(), old_image["collection"],
263 newcol.portable_data_hash(), migrated.group(1),
265 already_migrated.add(oldcol.portable_data_hash())
266 success.append(old_image["collection"])
268 logger.error("Error migrating '%s'", old_image["collection"])
269 failures.append(old_image["collection"])
270 except Exception as e:
271 logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
272 exc_info=(not isinstance(e, MigrationFailed)))
273 failures.append(old_image["collection"])
275 shutil.rmtree(varlibdocker)
276 shutil.rmtree(dockercache)
278 logger.info("Successfully migrated %i images", len(success))
280 logger.error("Failed to migrate %i images", len(failures))