Merge branch '8567-moar-docker' refs #8567
[arvados.git] / sdk / python / arvados / commands / migrate19.py
1 import argparse
2 import time
3 import sys
4 import logging
5 import shutil
6 import tempfile
7 import os
8 import subprocess
9 import re
10
11 import arvados
12 import arvados.commands.keepdocker
13 from arvados._version import __version__
14 from arvados.collection import CollectionReader
15
16 logger = logging.getLogger('arvados.migrate-docker19')
17 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
18                 else logging.INFO)
19
20 _migration_link_class = 'docker_image_migration'
21 _migration_link_name = 'migrate_1.9_1.10'
22
23 class MigrationFailed(Exception):
24     pass
25
26 def main(arguments=None):
27     """Docker image format migration tool for Arvados.
28
29     This converts Docker images stored in Arvados from image format v1
30     (Docker <= 1.9) to image format v2 (Docker >= 1.10).
31
32     Requires Docker running on the local host.
33
34     Usage:
35
36     1) Run arvados/docker/migrate-docker19/build.sh to create
37     arvados/migrate-docker19 Docker image.
38
39     2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
40
41     3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
42
43     This will query Arvados for v1 format Docker images.  For each image that
44     does not already have a corresponding v2 format image (as indicated by a
45     docker_image_migration tag) it will perform the following process:
46
47     i) download the image from Arvados
48     ii) load it into Docker
49     iii) update the Docker version, which updates the image
50     iv) save the v2 format image and upload to Arvados
51     v) create a migration link
52
53     """
54
55     migrate19_parser = argparse.ArgumentParser()
56     migrate19_parser.add_argument(
57         '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
58         help='Print version and exit.')
59     migrate19_parser.add_argument(
60         '--verbose', action="store_true", help="Print stdout/stderr even on success")
61     migrate19_parser.add_argument(
62         '--force', action="store_true", help="Try to migrate even if there isn't enough space")
63
64     migrate19_parser.add_argument(
65         '--storage-driver', type=str, default="overlay",
66         help="Docker storage driver, e.g. aufs, overlay, vfs")
67
68     exgroup = migrate19_parser.add_mutually_exclusive_group()
69     exgroup.add_argument(
70         '--dry-run', action='store_true', help="Print number of pending migrations.")
71     exgroup.add_argument(
72         '--print-unmigrated', action='store_true',
73         default=False, help="Print list of images needing migration.")
74
75     migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
76
77     migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
78                                   default=None, help="List of images to be migrated")
79
80     args = migrate19_parser.parse_args(arguments)
81
82     if args.tempdir:
83         tempfile.tempdir = args.tempdir
84
85     if args.verbose:
86         logger.setLevel(logging.DEBUG)
87
88     only_migrate = None
89     if args.infile:
90         only_migrate = set()
91         for l in args.infile:
92             only_migrate.add(l.strip())
93
94     api_client  = arvados.api()
95
96     user = api_client.users().current().execute()
97     if not user['is_admin']:
98         raise Exception("This command requires an admin token")
99     sys_uuid = user['uuid'][:12] + '000000000000000'
100
101     images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
102
103     is_new = lambda img: img['dockerhash'].startswith('sha256:')
104
105     count_new = 0
106     old_images = []
107     for uuid, img in images:
108         if img["dockerhash"].startswith("sha256:"):
109             continue
110         key = (img["repo"], img["tag"], img["timestamp"])
111         old_images.append(img)
112
113     migration_links = arvados.util.list_all(api_client.links().list, filters=[
114         ['link_class', '=', _migration_link_class],
115         ['name', '=', _migration_link_name],
116     ])
117
118     already_migrated = set()
119     for m in migration_links:
120         already_migrated.add(m["tail_uuid"])
121
122     items = arvados.util.list_all(api_client.collections().list,
123                                   filters=[["uuid", "in", [img["collection"] for img in old_images]]],
124                                   select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
125     uuid_to_collection = {i["uuid"]: i for i in items}
126
127     need_migrate = {}
128     totalbytes = 0
129     biggest = 0
130     biggest_pdh = None
131     for img in old_images:
132         i = uuid_to_collection[img["collection"]]
133         pdh = i["portable_data_hash"]
134         if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate):
135             need_migrate[pdh] = img
136             with CollectionReader(i["manifest_text"]) as c:
137                 if c.values()[0].size() > biggest:
138                     biggest = c.values()[0].size()
139                     biggest_pdh = pdh
140                 totalbytes += c.values()[0].size()
141
142
143     if args.storage_driver == "vfs":
144         will_need = (biggest*20)
145     else:
146         will_need = (biggest*2.5)
147
148     if args.print_unmigrated:
149         only_migrate = set()
150         for pdh in need_migrate:
151             print(pdh)
152         return
153
154     logger.info("Already migrated %i images", len(already_migrated))
155     logger.info("Need to migrate %i images", len(need_migrate))
156     logger.info("Using tempdir %s", tempfile.gettempdir())
157     logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest/(2**20))
158     logger.info("Total data to migrate about %i MiB", totalbytes/(2**20))
159
160     df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()])
161     ln = df_out.splitlines()[1]
162     filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1)
163     if int(available) <= will_need:
164         logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)/(2**20), will_need/(2**20))
165         if not args.force:
166             exit(1)
167         else:
168             logger.warn("--force provided, will migrate anyway")
169
170     if args.dry_run:
171         return
172
173     success = []
174     failures = []
175     count = 1
176     for old_image in need_migrate.values():
177         if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
178             continue
179
180         oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
181         tarfile = oldcol.keys()[0]
182
183         logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
184                     old_image["tag"], old_image["collection"], oldcol.values()[0].size()/(2**20))
185         count += 1
186         start = time.time()
187
188         varlibdocker = tempfile.mkdtemp()
189         dockercache = tempfile.mkdtemp()
190         try:
191             with tempfile.NamedTemporaryFile() as envfile:
192                 envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST")))
193                 envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN")))
194                 if arvados.config.get("ARVADOS_API_HOST_INSECURE"):
195                     envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE")))
196                 envfile.flush()
197
198                 dockercmd = ["docker", "run",
199                              "--privileged",
200                              "--rm",
201                              "--env-file", envfile.name,
202                              "--volume", "%s:/var/lib/docker" % varlibdocker,
203                              "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
204                              "arvados/migrate-docker19:1.0",
205                              "/root/migrate.sh",
206                              "%s/%s" % (old_image["collection"], tarfile),
207                              tarfile[0:40],
208                              old_image["repo"],
209                              old_image["tag"],
210                              uuid_to_collection[old_image["collection"]]["owner_uuid"],
211                              args.storage_driver]
212
213                 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
214                 out, err = proc.communicate()
215
216                 initial_space = re.search(r"Initial available space is (\d+)", out)
217                 imgload_space = re.search(r"Available space after image load is (\d+)", out)
218                 imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out)
219                 keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out)
220                 cleanup_space = re.search(r"Available space after cleanup is (\d+)", out)
221
222                 if initial_space:
223                     isp = int(initial_space.group(1))
224                     logger.info("Available space initially: %i MiB", (isp)/(2**20))
225                     if imgload_space:
226                         sp = int(imgload_space.group(1))
227                         logger.debug("Used after load: %i MiB", (isp-sp)/(2**20))
228                     if imgupgrade_space:
229                         sp = int(imgupgrade_space.group(1))
230                         logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20))
231                     if keepdocker_space:
232                         sp = int(keepdocker_space.group(1))
233                         logger.info("Used after upload: %i MiB", (isp-sp)/(2**20))
234
235                 if cleanup_space:
236                     sp = int(cleanup_space.group(1))
237                     logger.debug("Available after cleanup: %i MiB", (sp)/(2**20))
238
239                 if proc.returncode != 0:
240                     logger.error("Failed with return code %i", proc.returncode)
241                     logger.error("--- Stdout ---\n%s", out)
242                     logger.error("--- Stderr ---\n%s", err)
243                     raise MigrationFailed()
244
245                 if args.verbose:
246                     logger.info("--- Stdout ---\n%s", out)
247                     logger.info("--- Stderr ---\n%s", err)
248
249             migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
250             if migrated:
251                 newcol = CollectionReader(migrated.group(1))
252
253                 api_client.links().create(body={"link": {
254                     'owner_uuid': sys_uuid,
255                     'link_class': _migration_link_class,
256                     'name': _migration_link_name,
257                     'tail_uuid': oldcol.portable_data_hash(),
258                     'head_uuid': newcol.portable_data_hash()
259                     }}).execute(num_retries=3)
260
261                 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
262                             oldcol.portable_data_hash(), old_image["collection"],
263                             newcol.portable_data_hash(), migrated.group(1),
264                             time.time() - start)
265                 already_migrated.add(oldcol.portable_data_hash())
266                 success.append(old_image["collection"])
267             else:
268                 logger.error("Error migrating '%s'", old_image["collection"])
269                 failures.append(old_image["collection"])
270         except Exception as e:
271             logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
272                          exc_info=(not isinstance(e, MigrationFailed)))
273             failures.append(old_image["collection"])
274         finally:
275             shutil.rmtree(varlibdocker)
276             shutil.rmtree(dockercache)
277
278     logger.info("Successfully migrated %i images", len(success))
279     if failures:
280         logger.error("Failed to migrate %i images", len(failures))