8567: Make sure /root/.cache/arvados/docker is tempdir volume mount as well.
[arvados.git] / sdk / python / arvados / commands / migrate19.py
1 import argparse
2 import time
3 import sys
4 import logging
5 import shutil
6 import tempfile
7 import os
8 import subprocess
9 import re
10
11 import arvados
12 import arvados.commands.keepdocker
13 from arvados._version import __version__
14 from arvados.collection import CollectionReader
15
16 logger = logging.getLogger('arvados.migrate-docker19')
17 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
18                 else logging.INFO)
19
20 _migration_link_class = 'docker_image_migration'
21 _migration_link_name = 'migrate_1.9_1.10'
22
23 class MigrationFailed(Exception):
24     pass
25
26 def main(arguments=None):
27     """Docker image format migration tool for Arvados.
28
29     This converts Docker images stored in Arvados from image format v1
30     (Docker <= 1.9) to image format v2 (Docker >= 1.10).
31
32     Requires Docker running on the local host.
33
34     Usage:
35
36     1) Run arvados/docker/migrate-docker19/build.sh to create
37     arvados/migrate-docker19 Docker image.
38
39     2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
40
41     3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
42
43     This will query Arvados for v1 format Docker images.  For each image that
44     does not already have a corresponding v2 format image (as indicated by a
45     docker_image_migration tag) it will perform the following process:
46
47     i) download the image from Arvados
48     ii) load it into Docker
49     iii) update the Docker version, which updates the image
50     iv) save the v2 format image and upload to Arvados
51     v) create a migration link
52
53     """
54
55     migrate19_parser = argparse.ArgumentParser()
56     migrate19_parser.add_argument(
57         '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
58         help='Print version and exit.')
59
60     exgroup = migrate19_parser.add_mutually_exclusive_group()
61     exgroup.add_argument(
62         '--dry-run', action='store_true', help="Print number of pending migrations.")
63     exgroup.add_argument(
64         '--print-unmigrated', action='store_true',
65         default=False, help="Print list of images needing migration.")
66
67     migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
68
69     migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
70                                   default=None, help="List of images to be migrated")
71
72     args = migrate19_parser.parse_args(arguments)
73
74     if args.tempdir:
75         tempfile.tempdir = args.tempdir
76
77     only_migrate = None
78     if args.infile:
79         only_migrate = set()
80         for l in args.infile:
81             only_migrate.add(l.strip())
82
83     api_client  = arvados.api()
84
85     user = api_client.users().current().execute()
86     if not user['is_admin']:
87         raise Exception("This command requires an admin token")
88     sys_uuid = user['uuid'][:12] + '000000000000000'
89
90     images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
91
92     is_new = lambda img: img['dockerhash'].startswith('sha256:')
93
94     count_new = 0
95     old_images = []
96     for uuid, img in images:
97         if img["dockerhash"].startswith("sha256:"):
98             continue
99         key = (img["repo"], img["tag"], img["timestamp"])
100         old_images.append(img)
101
102     migration_links = arvados.util.list_all(api_client.links().list, filters=[
103         ['link_class', '=', _migration_link_class],
104         ['name', '=', _migration_link_name],
105     ])
106
107     already_migrated = set()
108     for m in migration_links:
109         already_migrated.add(m["tail_uuid"])
110
111     items = arvados.util.list_all(api_client.collections().list,
112                                   filters=[["uuid", "in", [img["collection"] for img in old_images]]],
113                                   select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
114     uuid_to_collection = {i["uuid"]: i for i in items}
115
116     need_migrate = {}
117     biggest = 0
118     for img in old_images:
119         i = uuid_to_collection[img["collection"]]
120         pdh = i["portable_data_hash"]
121         if pdh not in already_migrated and (only_migrate is None or pdh in only_migrate):
122             need_migrate[pdh] = img
123             with CollectionReader(i["manifest_text"]) as c:
124                 if c.values()[0].size() > biggest:
125                     biggest = c.values()[0].size()
126
127     if args.print_unmigrated:
128         only_migrate = set()
129         for pdh in need_migrate:
130             print pdh
131         return
132
133     logger.info("Already migrated %i images", len(already_migrated))
134     logger.info("Need to migrate %i images", len(need_migrate))
135     logger.info("Using tempdir %s", tempfile.gettempdir())
136     logger.info("Biggest image is about %i MiB, tempdir needs at least %i MiB free", biggest/(2**20), (biggest*2)/(2**20))
137
138     if args.dry_run:
139         return
140
141     success = []
142     failures = []
143     count = 1
144     for old_image in need_migrate.values():
145         if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
146             continue
147
148         oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
149         tarfile = oldcol.keys()[0]
150
151         logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
152                     old_image["tag"], old_image["collection"], oldcol.values()[0].size()/(2**20))
153         count += 1
154         start = time.time()
155
156         varlibdocker = tempfile.mkdtemp()
157         dockercache = tempfile.mkdtemp()
158         try:
159             with tempfile.NamedTemporaryFile() as envfile:
160                 envfile.write("ARVADOS_API_HOST=%s\n" % (os.environ["ARVADOS_API_HOST"]))
161                 envfile.write("ARVADOS_API_TOKEN=%s\n" % (os.environ["ARVADOS_API_TOKEN"]))
162                 if "ARVADOS_API_HOST_INSECURE" in os.environ:
163                     envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (os.environ["ARVADOS_API_HOST_INSECURE"]))
164                 envfile.flush()
165
166                 dockercmd = ["docker", "run",
167                              "--privileged",
168                              "--rm",
169                              "--env-file", envfile.name,
170                              "--volume", "%s:/var/lib/docker" % varlibdocker,
171                              "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
172                              "arvados/migrate-docker19",
173                              "/root/migrate.sh",
174                              "%s/%s" % (old_image["collection"], tarfile),
175                              tarfile[0:40],
176                              old_image["repo"],
177                              old_image["tag"],
178                              uuid_to_collection[old_image["collection"]]["owner_uuid"]]
179
180                 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
181                 out, err = proc.communicate()
182
183                 if proc.returncode != 0:
184                     logger.error("Failed with return code %i", proc.returncode)
185                     logger.error("--- Stdout ---\n%s", out)
186                     logger.error("--- Stderr ---\n%s", err)
187                     raise MigrationFailed()
188
189             migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
190             if migrated:
191                 newcol = CollectionReader(migrated.group(1))
192
193                 api_client.links().create(body={"link": {
194                     'owner_uuid': sys_uuid,
195                     'link_class': _migration_link_class,
196                     'name': _migration_link_name,
197                     'tail_uuid': oldcol.portable_data_hash(),
198                     'head_uuid': newcol.portable_data_hash()
199                     }}).execute(num_retries=3)
200
201                 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
202                             oldcol.portable_data_hash(), old_image["collection"],
203                             newcol.portable_data_hash(), migrated.group(1),
204                             time.time() - start)
205                 already_migrated.add(oldcol.portable_data_hash())
206                 success.append(old_image["collection"])
207             else:
208                 logger.error("Error migrating '%s'", old_image["collection"])
209                 failures.append(old_image["collection"])
210         except Exception as e:
211             logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
212                          exc_info=(not isinstance(e, MigrationFailed)))
213             failures.append(old_image["collection"])
214         finally:
215             shutil.rmtree(varlibdocker)
216             shutil.rmtree(dockercache)
217
218     logger.info("Successfully migrated %i images", len(success))
219     if failures:
220         logger.error("Failed to migrate %i images", len(failures))