8567: Use vfs Docker graph driver for maximum compatability. --dry-run reports
[arvados.git] / sdk / python / arvados / commands / migrate19.py
1 import argparse
2 import time
3 import sys
4 import logging
5 import shutil
6 import tempfile
7 import os
8 import subprocess
9 import re
10
11 import arvados
12 import arvados.commands.keepdocker
13 from arvados._version import __version__
14 from arvados.collection import CollectionReader
15
16 logger = logging.getLogger('arvados.migrate-docker19')
17 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
18                 else logging.INFO)
19
20 _migration_link_class = 'docker_image_migration'
21 _migration_link_name = 'migrate_1.9_1.10'
22
23 class MigrationFailed(Exception):
24     pass
25
26 def main(arguments=None):
27     """Docker image format migration tool for Arvados.
28
29     This converts Docker images stored in Arvados from image format v1
30     (Docker <= 1.9) to image format v2 (Docker >= 1.10).
31
32     Requires Docker running on the local host.
33
34     Usage:
35
36     1) Run arvados/docker/migrate-docker19/build.sh to create
37     arvados/migrate-docker19 Docker image.
38
39     2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
40
41     3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
42
43     This will query Arvados for v1 format Docker images.  For each image that
44     does not already have a corresponding v2 format image (as indicated by a
45     docker_image_migration tag) it will perform the following process:
46
47     i) download the image from Arvados
48     ii) load it into Docker
49     iii) update the Docker version, which updates the image
50     iv) save the v2 format image and upload to Arvados
51     v) create a migration link
52
53     """
54
55     migrate19_parser = argparse.ArgumentParser()
56     migrate19_parser.add_argument(
57         '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
58         help='Print version and exit.')
59
60     exgroup = migrate19_parser.add_mutually_exclusive_group()
61     exgroup.add_argument(
62         '--dry-run', action='store_true', help="Print number of pending migrations.")
63     exgroup.add_argument(
64         '--print-unmigrated', action='store_true',
65         default=False, help="Print list of images needing migration.")
66
67     migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
68
69     migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
70                                   default=None, help="List of images to be migrated")
71
72     args = migrate19_parser.parse_args(arguments)
73
74     if args.tempdir:
75         tempfile.tempdir = args.tempdir
76
77     only_migrate = None
78     if args.infile:
79         only_migrate = set()
80         for l in args.infile:
81             only_migrate.add(l.strip())
82
83     api_client  = arvados.api()
84
85     user = api_client.users().current().execute()
86     if not user['is_admin']:
87         raise Exception("This command requires an admin token")
88     sys_uuid = user['uuid'][:12] + '000000000000000'
89
90     images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
91
92     is_new = lambda img: img['dockerhash'].startswith('sha256:')
93
94     count_new = 0
95     old_images = []
96     for uuid, img in images:
97         if img["dockerhash"].startswith("sha256:"):
98             continue
99         key = (img["repo"], img["tag"], img["timestamp"])
100         old_images.append(img)
101
102     migration_links = arvados.util.list_all(api_client.links().list, filters=[
103         ['link_class', '=', _migration_link_class],
104         ['name', '=', _migration_link_name],
105     ])
106
107     already_migrated = set()
108     for m in migration_links:
109         already_migrated.add(m["tail_uuid"])
110
111     items = arvados.util.list_all(api_client.collections().list,
112                                   filters=[["uuid", "in", [img["collection"] for img in old_images]]],
113                                   select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
114     uuid_to_collection = {i["uuid"]: i for i in items}
115
116     need_migrate = {}
117     biggest = 0
118     for img in old_images:
119         i = uuid_to_collection[img["collection"]]
120         pdh = i["portable_data_hash"]
121         if pdh not in already_migrated and (only_migrate is None or pdh in only_migrate):
122             need_migrate[pdh] = img
123             with CollectionReader(i["manifest_text"]) as c:
124                 if c.values()[0].size() > biggest:
125                     biggest = c.values()[0].size()
126
127     if args.print_unmigrated:
128         only_migrate = set()
129         for pdh in need_migrate:
130             print pdh
131         return
132
133     logger.info("Already migrated %i images", len(already_migrated))
134     logger.info("Need to migrate %i images", len(need_migrate))
135     logger.info("Using tempdir %s", tempfile.gettempdir())
136     logger.info("Biggest image is about %i MiB, tempdir needs at least %i MiB free", biggest/(2**20), (biggest*2)/(2**20))
137
138     if args.dry_run:
139         return
140
141     success = []
142     failures = []
143     count = 1
144     for old_image in need_migrate.values():
145         if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
146             continue
147
148         oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
149         tarfile = oldcol.keys()[0]
150
151         logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
152                     old_image["tag"], old_image["collection"], oldcol.values()[0].size()/(2**20))
153         count += 1
154         start = time.time()
155
156         varlibdocker = tempfile.mkdtemp()
157         try:
158             with tempfile.NamedTemporaryFile() as envfile:
159                 envfile.write("ARVADOS_API_HOST=%s\n" % (os.environ["ARVADOS_API_HOST"]))
160                 envfile.write("ARVADOS_API_TOKEN=%s\n" % (os.environ["ARVADOS_API_TOKEN"]))
161                 if "ARVADOS_API_HOST_INSECURE" in os.environ:
162                     envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (os.environ["ARVADOS_API_HOST_INSECURE"]))
163                 envfile.flush()
164
165                 dockercmd = ["docker", "run",
166                              "--privileged",
167                              "--rm",
168                              "--env-file", envfile.name,
169                              "--volume", "%s:/var/lib/docker" % varlibdocker,
170                              "arvados/migrate-docker19",
171                              "/root/migrate.sh",
172                              "%s/%s" % (old_image["collection"], tarfile),
173                              tarfile[0:40],
174                              old_image["repo"],
175                              old_image["tag"],
176                              uuid_to_collection[old_image["collection"]]["owner_uuid"]]
177
178                 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
179                 out, err = proc.communicate()
180
181                 if proc.returncode != 0:
182                     logger.error("Failed with return code %i", proc.returncode)
183                     logger.error("--- Stdout ---\n%s", out)
184                     logger.error("--- Stderr ---\n%s", err)
185                     raise MigrationFailed()
186
187             migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
188             if migrated:
189                 newcol = CollectionReader(migrated.group(1))
190
191                 api_client.links().create(body={"link": {
192                     'owner_uuid': sys_uuid,
193                     'link_class': _migration_link_class,
194                     'name': _migration_link_name,
195                     'tail_uuid': oldcol.portable_data_hash(),
196                     'head_uuid': newcol.portable_data_hash()
197                     }}).execute(num_retries=3)
198
199                 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
200                             oldcol.portable_data_hash(), old_image["collection"],
201                             newcol.portable_data_hash(), migrated.group(1),
202                             time.time() - start)
203                 already_migrated.add(oldcol.portable_data_hash())
204                 success.append(old_image["collection"])
205             else:
206                 logger.error("Error migrating '%s'", old_image["collection"])
207                 failures.append(old_image["collection"])
208         except Exception as e:
209             logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
210                          exc_info=(not isinstance(e, MigrationFailed)))
211             failures.append(old_image["collection"])
212         finally:
213             shutil.rmtree(varlibdocker)
214
215     logger.info("Successfully migrated %i images", len(success))
216     if failures:
217         logger.error("Failed to migrate %i images", len(failures))