11308: Eliminate old_div().
[arvados.git] / sdk / python / arvados / commands / migrate19.py
1 from __future__ import print_function
2 from __future__ import division
3 import argparse
4 import time
5 import sys
6 import logging
7 import shutil
8 import tempfile
9 import os
10 import subprocess
11 import re
12
13 import arvados
14 import arvados.commands.keepdocker
15 from arvados._version import __version__
16 from arvados.collection import CollectionReader
17
18 logger = logging.getLogger('arvados.migrate-docker19')
19 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
20                 else logging.INFO)
21
22 _migration_link_class = 'docker_image_migration'
23 _migration_link_name = 'migrate_1.9_1.10'
24
25 class MigrationFailed(Exception):
26     pass
27
28 def main(arguments=None):
29     """Docker image format migration tool for Arvados.
30
31     This converts Docker images stored in Arvados from image format v1
32     (Docker <= 1.9) to image format v2 (Docker >= 1.10).
33
34     Requires Docker running on the local host.
35
36     Usage:
37
38     1) Run arvados/docker/migrate-docker19/build.sh to create
39     arvados/migrate-docker19 Docker image.
40
41     2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
42
43     3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
44
45     This will query Arvados for v1 format Docker images.  For each image that
46     does not already have a corresponding v2 format image (as indicated by a
47     docker_image_migration tag) it will perform the following process:
48
49     i) download the image from Arvados
50     ii) load it into Docker
51     iii) update the Docker version, which updates the image
52     iv) save the v2 format image and upload to Arvados
53     v) create a migration link
54
55     """
56
57     migrate19_parser = argparse.ArgumentParser()
58     migrate19_parser.add_argument(
59         '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
60         help='Print version and exit.')
61
62     exgroup = migrate19_parser.add_mutually_exclusive_group()
63     exgroup.add_argument(
64         '--dry-run', action='store_true', help="Print number of pending migrations.")
65     exgroup.add_argument(
66         '--print-unmigrated', action='store_true',
67         default=False, help="Print list of images needing migration.")
68
69     migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
70
71     migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
72                                   default=None, help="List of images to be migrated")
73
74     args = migrate19_parser.parse_args(arguments)
75
76     if args.tempdir:
77         tempfile.tempdir = args.tempdir
78
79     only_migrate = None
80     if args.infile:
81         only_migrate = set()
82         for l in args.infile:
83             only_migrate.add(l.strip())
84
85     api_client  = arvados.api()
86
87     user = api_client.users().current().execute()
88     if not user['is_admin']:
89         raise Exception("This command requires an admin token")
90     sys_uuid = user['uuid'][:12] + '000000000000000'
91
92     images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
93
94     is_new = lambda img: img['dockerhash'].startswith('sha256:')
95
96     count_new = 0
97     old_images = []
98     for uuid, img in images:
99         if img["dockerhash"].startswith("sha256:"):
100             continue
101         key = (img["repo"], img["tag"], img["timestamp"])
102         old_images.append(img)
103
104     migration_links = arvados.util.list_all(api_client.links().list, filters=[
105         ['link_class', '=', _migration_link_class],
106         ['name', '=', _migration_link_name],
107     ])
108
109     already_migrated = set()
110     for m in migration_links:
111         already_migrated.add(m["tail_uuid"])
112
113     items = arvados.util.list_all(api_client.collections().list,
114                                   filters=[["uuid", "in", [img["collection"] for img in old_images]]],
115                                   select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
116     uuid_to_collection = {i["uuid"]: i for i in items}
117
118     need_migrate = {}
119     biggest = 0
120     for img in old_images:
121         i = uuid_to_collection[img["collection"]]
122         pdh = i["portable_data_hash"]
123         if pdh not in already_migrated and (only_migrate is None or pdh in only_migrate):
124             need_migrate[pdh] = img
125             with CollectionReader(i["manifest_text"]) as c:
126                 if list(c.values())[0].size() > biggest:
127                     biggest = list(c.values())[0].size()
128
129     if args.print_unmigrated:
130         only_migrate = set()
131         for pdh in need_migrate:
132             print(pdh)
133         return
134
135     logger.info("Already migrated %i images", len(already_migrated))
136     logger.info("Need to migrate %i images", len(need_migrate))
137     logger.info("Using tempdir %s", tempfile.gettempdir())
138     logger.info("Biggest image is about %i MiB, tempdir needs at least %i MiB free", biggest>>20, biggest>>19)
139
140     if args.dry_run:
141         return
142
143     success = []
144     failures = []
145     count = 1
146     for old_image in list(need_migrate.values()):
147         if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
148             continue
149
150         oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
151         tarfile = list(oldcol.keys())[0]
152
153         logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
154                     old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20)
155         count += 1
156         start = time.time()
157
158         varlibdocker = tempfile.mkdtemp()
159         dockercache = tempfile.mkdtemp()
160         try:
161             with tempfile.NamedTemporaryFile() as envfile:
162                 envfile.write("ARVADOS_API_HOST=%s\n" % (os.environ["ARVADOS_API_HOST"]))
163                 envfile.write("ARVADOS_API_TOKEN=%s\n" % (os.environ["ARVADOS_API_TOKEN"]))
164                 if "ARVADOS_API_HOST_INSECURE" in os.environ:
165                     envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (os.environ["ARVADOS_API_HOST_INSECURE"]))
166                 envfile.flush()
167
168                 dockercmd = ["docker", "run",
169                              "--privileged",
170                              "--rm",
171                              "--env-file", envfile.name,
172                              "--volume", "%s:/var/lib/docker" % varlibdocker,
173                              "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
174                              "arvados/migrate-docker19",
175                              "/root/migrate.sh",
176                              "%s/%s" % (old_image["collection"], tarfile),
177                              tarfile[0:40],
178                              old_image["repo"],
179                              old_image["tag"],
180                              uuid_to_collection[old_image["collection"]]["owner_uuid"]]
181
182                 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
183                 out, err = proc.communicate()
184
185                 if proc.returncode != 0:
186                     logger.error("Failed with return code %i", proc.returncode)
187                     logger.error("--- Stdout ---\n%s", out)
188                     logger.error("--- Stderr ---\n%s", err)
189                     raise MigrationFailed()
190
191             migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
192             if migrated:
193                 newcol = CollectionReader(migrated.group(1))
194
195                 api_client.links().create(body={"link": {
196                     'owner_uuid': sys_uuid,
197                     'link_class': _migration_link_class,
198                     'name': _migration_link_name,
199                     'tail_uuid': oldcol.portable_data_hash(),
200                     'head_uuid': newcol.portable_data_hash()
201                     }}).execute(num_retries=3)
202
203                 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
204                             oldcol.portable_data_hash(), old_image["collection"],
205                             newcol.portable_data_hash(), migrated.group(1),
206                             time.time() - start)
207                 already_migrated.add(oldcol.portable_data_hash())
208                 success.append(old_image["collection"])
209             else:
210                 logger.error("Error migrating '%s'", old_image["collection"])
211                 failures.append(old_image["collection"])
212         except Exception as e:
213             logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
214                          exc_info=(not isinstance(e, MigrationFailed)))
215             failures.append(old_image["collection"])
216         finally:
217             shutil.rmtree(varlibdocker)
218             shutil.rmtree(dockercache)
219
220     logger.info("Successfully migrated %i images", len(success))
221     if failures:
222         logger.error("Failed to migrate %i images", len(failures))