1 from __future__ import print_function
2 from __future__ import division
14 import arvados.commands.keepdocker
15 from arvados._version import __version__
16 from arvados.collection import CollectionReader
18 logger = logging.getLogger('arvados.migrate-docker19')
19 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
22 _migration_link_class = 'docker_image_migration'
23 _migration_link_name = 'migrate_1.9_1.10'
25 class MigrationFailed(Exception):
28 def main(arguments=None):
29 """Docker image format migration tool for Arvados.
31 This converts Docker images stored in Arvados from image format v1
32 (Docker <= 1.9) to image format v2 (Docker >= 1.10).
34 Requires Docker running on the local host.
38 1) Run arvados/docker/migrate-docker19/build.sh to create
39 arvados/migrate-docker19 Docker image.
41 2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
43 3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
45 This will query Arvados for v1 format Docker images. For each image that
46 does not already have a corresponding v2 format image (as indicated by a
47 docker_image_migration tag) it will perform the following process:
49 i) download the image from Arvados
50 ii) load it into Docker
51 iii) update the Docker version, which updates the image
52 iv) save the v2 format image and upload to Arvados
53 v) create a migration link
57 migrate19_parser = argparse.ArgumentParser()
58 migrate19_parser.add_argument(
59 '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
60 help='Print version and exit.')
62 exgroup = migrate19_parser.add_mutually_exclusive_group()
64 '--dry-run', action='store_true', help="Print number of pending migrations.")
66 '--print-unmigrated', action='store_true',
67 default=False, help="Print list of images needing migration.")
69 migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
71 migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
72 default=None, help="List of images to be migrated")
74 args = migrate19_parser.parse_args(arguments)
77 tempfile.tempdir = args.tempdir
83 only_migrate.add(l.strip())
85 api_client = arvados.api()
87 user = api_client.users().current().execute()
88 if not user['is_admin']:
89 raise Exception("This command requires an admin token")
90 sys_uuid = user['uuid'][:12] + '000000000000000'
92 images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
94 is_new = lambda img: img['dockerhash'].startswith('sha256:')
98 for uuid, img in images:
99 if img["dockerhash"].startswith("sha256:"):
101 key = (img["repo"], img["tag"], img["timestamp"])
102 old_images.append(img)
104 migration_links = arvados.util.list_all(api_client.links().list, filters=[
105 ['link_class', '=', _migration_link_class],
106 ['name', '=', _migration_link_name],
109 already_migrated = set()
110 for m in migration_links:
111 already_migrated.add(m["tail_uuid"])
113 items = arvados.util.list_all(api_client.collections().list,
114 filters=[["uuid", "in", [img["collection"] for img in old_images]]],
115 select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
116 uuid_to_collection = {i["uuid"]: i for i in items}
120 for img in old_images:
121 i = uuid_to_collection[img["collection"]]
122 pdh = i["portable_data_hash"]
123 if pdh not in already_migrated and (only_migrate is None or pdh in only_migrate):
124 need_migrate[pdh] = img
125 with CollectionReader(i["manifest_text"]) as c:
126 if list(c.values())[0].size() > biggest:
127 biggest = list(c.values())[0].size()
129 if args.print_unmigrated:
131 for pdh in need_migrate:
135 logger.info("Already migrated %i images", len(already_migrated))
136 logger.info("Need to migrate %i images", len(need_migrate))
137 logger.info("Using tempdir %s", tempfile.gettempdir())
138 logger.info("Biggest image is about %i MiB, tempdir needs at least %i MiB free", biggest>>20, biggest>>19)
146 for old_image in list(need_migrate.values()):
147 if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
150 oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
151 tarfile = list(oldcol.keys())[0]
153 logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
154 old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20)
158 varlibdocker = tempfile.mkdtemp()
159 dockercache = tempfile.mkdtemp()
161 with tempfile.NamedTemporaryFile() as envfile:
162 envfile.write("ARVADOS_API_HOST=%s\n" % (os.environ["ARVADOS_API_HOST"]))
163 envfile.write("ARVADOS_API_TOKEN=%s\n" % (os.environ["ARVADOS_API_TOKEN"]))
164 if "ARVADOS_API_HOST_INSECURE" in os.environ:
165 envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (os.environ["ARVADOS_API_HOST_INSECURE"]))
168 dockercmd = ["docker", "run",
171 "--env-file", envfile.name,
172 "--volume", "%s:/var/lib/docker" % varlibdocker,
173 "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
174 "arvados/migrate-docker19",
176 "%s/%s" % (old_image["collection"], tarfile),
180 uuid_to_collection[old_image["collection"]]["owner_uuid"]]
182 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
183 out, err = proc.communicate()
185 if proc.returncode != 0:
186 logger.error("Failed with return code %i", proc.returncode)
187 logger.error("--- Stdout ---\n%s", out)
188 logger.error("--- Stderr ---\n%s", err)
189 raise MigrationFailed()
191 migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
193 newcol = CollectionReader(migrated.group(1))
195 api_client.links().create(body={"link": {
196 'owner_uuid': sys_uuid,
197 'link_class': _migration_link_class,
198 'name': _migration_link_name,
199 'tail_uuid': oldcol.portable_data_hash(),
200 'head_uuid': newcol.portable_data_hash()
201 }}).execute(num_retries=3)
203 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
204 oldcol.portable_data_hash(), old_image["collection"],
205 newcol.portable_data_hash(), migrated.group(1),
207 already_migrated.add(oldcol.portable_data_hash())
208 success.append(old_image["collection"])
210 logger.error("Error migrating '%s'", old_image["collection"])
211 failures.append(old_image["collection"])
212 except Exception as e:
213 logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
214 exc_info=(not isinstance(e, MigrationFailed)))
215 failures.append(old_image["collection"])
217 shutil.rmtree(varlibdocker)
218 shutil.rmtree(dockercache)
220 logger.info("Successfully migrated %i images", len(success))
222 logger.error("Failed to migrate %i images", len(failures))