8567: Improve disk space accounting. Default to use overlay storage driver (but...
[arvados.git] / sdk / python / arvados / commands / migrate19.py
1 import argparse
2 import time
3 import sys
4 import logging
5 import shutil
6 import tempfile
7 import os
8 import subprocess
9 import re
10
11 import arvados
12 import arvados.commands.keepdocker
13 from arvados._version import __version__
14 from arvados.collection import CollectionReader
15
16 logger = logging.getLogger('arvados.migrate-docker19')
17 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
18                 else logging.INFO)
19
20 _migration_link_class = 'docker_image_migration'
21 _migration_link_name = 'migrate_1.9_1.10'
22
23 class MigrationFailed(Exception):
24     pass
25
26 def main(arguments=None):
27     """Docker image format migration tool for Arvados.
28
29     This converts Docker images stored in Arvados from image format v1
30     (Docker <= 1.9) to image format v2 (Docker >= 1.10).
31
32     Requires Docker running on the local host.
33
34     Usage:
35
36     1) Run arvados/docker/migrate-docker19/build.sh to create
37     arvados/migrate-docker19 Docker image.
38
39     2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
40
41     3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
42
43     This will query Arvados for v1 format Docker images.  For each image that
44     does not already have a corresponding v2 format image (as indicated by a
45     docker_image_migration tag) it will perform the following process:
46
47     i) download the image from Arvados
48     ii) load it into Docker
49     iii) update the Docker version, which updates the image
50     iv) save the v2 format image and upload to Arvados
51     v) create a migration link
52
53     """
54
55     migrate19_parser = argparse.ArgumentParser()
56     migrate19_parser.add_argument(
57         '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
58         help='Print version and exit.')
59     migrate19_parser.add_argument(
60         '--verbose', action="store_true", help="Print stdout/stderr even on success")
61     migrate19_parser.add_argument(
62         '--force', action="store_true", help="Try to migrate even if there isn't enough space")
63
64     migrate19_parser.add_argument(
65         '--storage-driver', type=str, default="overlay",
66         help="Docker storage driver, e.g. aufs, overlay, vfs")
67
68     exgroup = migrate19_parser.add_mutually_exclusive_group()
69     exgroup.add_argument(
70         '--dry-run', action='store_true', help="Print number of pending migrations.")
71     exgroup.add_argument(
72         '--print-unmigrated', action='store_true',
73         default=False, help="Print list of images needing migration.")
74
75     migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
76
77     migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
78                                   default=None, help="List of images to be migrated")
79
80     args = migrate19_parser.parse_args(arguments)
81
82     if args.tempdir:
83         tempfile.tempdir = args.tempdir
84
85     if args.verbose:
86         logger.setLevel(logging.DEBUG)
87
88     only_migrate = None
89     if args.infile:
90         only_migrate = set()
91         for l in args.infile:
92             only_migrate.add(l.strip())
93
94     api_client  = arvados.api()
95
96     user = api_client.users().current().execute()
97     if not user['is_admin']:
98         raise Exception("This command requires an admin token")
99     sys_uuid = user['uuid'][:12] + '000000000000000'
100
101     images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
102
103     is_new = lambda img: img['dockerhash'].startswith('sha256:')
104
105     count_new = 0
106     old_images = []
107     for uuid, img in images:
108         if img["dockerhash"].startswith("sha256:"):
109             continue
110         key = (img["repo"], img["tag"], img["timestamp"])
111         old_images.append(img)
112
113     migration_links = arvados.util.list_all(api_client.links().list, filters=[
114         ['link_class', '=', _migration_link_class],
115         ['name', '=', _migration_link_name],
116     ])
117
118     already_migrated = set()
119     for m in migration_links:
120         already_migrated.add(m["tail_uuid"])
121
122     items = arvados.util.list_all(api_client.collections().list,
123                                   filters=[["uuid", "in", [img["collection"] for img in old_images]]],
124                                   select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
125     uuid_to_collection = {i["uuid"]: i for i in items}
126
127     need_migrate = {}
128     biggest = 0
129     biggest_pdh = None
130     for img in old_images:
131         i = uuid_to_collection[img["collection"]]
132         pdh = i["portable_data_hash"]
133         if pdh not in already_migrated and (only_migrate is None or pdh in only_migrate):
134             need_migrate[pdh] = img
135             with CollectionReader(i["manifest_text"]) as c:
136                 if c.values()[0].size() > biggest:
137                     biggest = c.values()[0].size()
138                     biggest_pdh = pdh
139
140
141     if args.storage_driver == "vfs":
142         will_need = (biggest*20)
143     else:
144         will_need = (biggest*2.5)
145
146     if args.print_unmigrated:
147         only_migrate = set()
148         for pdh in need_migrate:
149             print(pdh)
150         return
151
152     logger.info("Already migrated %i images", len(already_migrated))
153     logger.info("Need to migrate %i images", len(need_migrate))
154     logger.info("Using tempdir %s", tempfile.gettempdir())
155     logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest/(2**20))
156
157     df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()])
158     ln = df_out.splitlines()[1]
159     filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1)
160     if int(available) <= will_need:
161         logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)/(2**20), will_need/(2**20))
162         if not args.force:
163             exit(1)
164         else:
165             logger.warn("--force provided, will migrate anyway")
166
167     if args.dry_run:
168         return
169
170     success = []
171     failures = []
172     count = 1
173     for old_image in need_migrate.values():
174         if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
175             continue
176
177         oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
178         tarfile = oldcol.keys()[0]
179
180         logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
181                     old_image["tag"], old_image["collection"], oldcol.values()[0].size()/(2**20))
182         count += 1
183         start = time.time()
184
185         varlibdocker = tempfile.mkdtemp()
186         dockercache = tempfile.mkdtemp()
187         try:
188             with tempfile.NamedTemporaryFile() as envfile:
189                 envfile.write("ARVADOS_API_HOST=%s\n" % (os.environ["ARVADOS_API_HOST"]))
190                 envfile.write("ARVADOS_API_TOKEN=%s\n" % (os.environ["ARVADOS_API_TOKEN"]))
191                 if "ARVADOS_API_HOST_INSECURE" in os.environ:
192                     envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (os.environ["ARVADOS_API_HOST_INSECURE"]))
193                 envfile.flush()
194
195                 dockercmd = ["docker", "run",
196                              "--privileged",
197                              "--rm",
198                              "--env-file", envfile.name,
199                              "--volume", "%s:/var/lib/docker" % varlibdocker,
200                              "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
201                              "arvados/migrate-docker19:1.0",
202                              "/root/migrate.sh",
203                              "%s/%s" % (old_image["collection"], tarfile),
204                              tarfile[0:40],
205                              old_image["repo"],
206                              old_image["tag"],
207                              uuid_to_collection[old_image["collection"]]["owner_uuid"],
208                              args.storage_driver]
209
210                 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
211                 out, err = proc.communicate()
212
213                 initial_space = re.search(r"Initial available space is (\d+)", out)
214                 imgload_space = re.search(r"Available space after image load is (\d+)", out)
215                 imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out)
216                 keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out)
217                 cleanup_space = re.search(r"Available space after cleanup is (\d+)", out)
218
219                 if initial_space:
220                     isp = int(initial_space.group(1))
221                     logger.debug("Available space initially: %i MiB", (isp)/(2**20))
222                     if imgload_space:
223                         sp = int(imgload_space.group(1))
224                         logger.debug("Used after load: %i MiB", (isp-sp)/(2**20))
225                     if imgupgrade_space:
226                         sp = int(imgupgrade_space.group(1))
227                         logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20))
228                     if keepdocker_space:
229                         sp = int(keepdocker_space.group(1))
230                         logger.debug("Used after upload: %i MiB", (isp-sp)/(2**20))
231
232                 if cleanup_space:
233                     sp = int(cleanup_space.group(1))
234                     logger.info("Available after cleanup: %i MiB", (sp)/(2**20))
235
236                 if proc.returncode != 0:
237                     logger.error("Failed with return code %i", proc.returncode)
238                     logger.error("--- Stdout ---\n%s", out)
239                     logger.error("--- Stderr ---\n%s", err)
240                     raise MigrationFailed()
241
242                 if args.verbose:
243                     logger.info("--- Stdout ---\n%s", out)
244                     logger.info("--- Stderr ---\n%s", err)
245
246             migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
247             if migrated:
248                 newcol = CollectionReader(migrated.group(1))
249
250                 api_client.links().create(body={"link": {
251                     'owner_uuid': sys_uuid,
252                     'link_class': _migration_link_class,
253                     'name': _migration_link_name,
254                     'tail_uuid': oldcol.portable_data_hash(),
255                     'head_uuid': newcol.portable_data_hash()
256                     }}).execute(num_retries=3)
257
258                 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
259                             oldcol.portable_data_hash(), old_image["collection"],
260                             newcol.portable_data_hash(), migrated.group(1),
261                             time.time() - start)
262                 already_migrated.add(oldcol.portable_data_hash())
263                 success.append(old_image["collection"])
264             else:
265                 logger.error("Error migrating '%s'", old_image["collection"])
266                 failures.append(old_image["collection"])
267         except Exception as e:
268             logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
269                          exc_info=(not isinstance(e, MigrationFailed)))
270             failures.append(old_image["collection"])
271         finally:
272             shutil.rmtree(varlibdocker)
273             shutil.rmtree(dockercache)
274
275     logger.info("Successfully migrated %i images", len(success))
276     if failures:
277         logger.error("Failed to migrate %i images", len(failures))