e75095bf6c94956b316cd07faca4f1be278d617d
[arvados.git] / sdk / python / arvados / commands / migrate19.py
1 import argparse
2 import time
3 import sys
4 import logging
5 import shutil
6 import tempfile
7 import os
8 import subprocess
9 import re
10
11 import arvados
12 import arvados.commands.keepdocker
13 from arvados._version import __version__
14 from arvados.collection import CollectionReader
15
16 logger = logging.getLogger('arvados.migrate-docker19')
17 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
18                 else logging.INFO)
19
20 _migration_link_class = 'docker_image_migration'
21 _migration_link_name = 'migrate_1.9_1.10'
22
23 class MigrationFailed(Exception):
24     pass
25
26 def main(arguments=None):
27     """Docker image format migration tool for Arvados.
28
29     This converts Docker images stored in Arvados from image format v1
30     (Docker <= 1.9) to image format v2 (Docker >= 1.10).
31
32     Requires Docker running on the local host.
33
34     Usage:
35
36     1) Run arvados/docker/migrate-docker19/build.sh to create
37     arvados/migrate-docker19 Docker image.
38
39     2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
40
41     3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
42
43     This will query Arvados for v1 format Docker images.  For each image that
44     does not already have a corresponding v2 format image (as indicated by a
45     docker_image_migration tag) it will perform the following process:
46
47     i) download the image from Arvados
48     ii) load it into Docker
49     iii) update the Docker version, which updates the image
50     iv) save the v2 format image and upload to Arvados
51     v) create a migration link
52
53     """
54
55     migrate19_parser = argparse.ArgumentParser()
56     migrate19_parser.add_argument(
57         '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
58         help='Print version and exit.')
59
60     exgroup = migrate19_parser.add_mutually_exclusive_group()
61     exgroup.add_argument(
62         '--dry-run', action='store_true', help="Print number of pending migrations.")
63     exgroup.add_argument(
64         '--print-unmigrated', action='store_true',
65         default=False, help="Print list of images needing migration.")
66
67     migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
68                                   default=None, help="List of images to be migrated")
69
70     args = migrate19_parser.parse_args(arguments)
71
72     only_migrate = None
73     if args.infile:
74         only_migrate = set()
75         for l in args.infile:
76             only_migrate.add(l.strip())
77
78     api_client  = arvados.api()
79
80     user = api_client.users().current().execute()
81     if not user['is_admin']:
82         raise Exception("This command requires an admin token")
83     sys_uuid = user['uuid'][:12] + '000000000000000'
84
85     images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
86
87     is_new = lambda img: img['dockerhash'].startswith('sha256:')
88
89     count_new = 0
90     old_images = []
91     for uuid, img in images:
92         if img["dockerhash"].startswith("sha256:"):
93             continue
94         key = (img["repo"], img["tag"], img["timestamp"])
95         old_images.append(img)
96
97     migration_links = arvados.util.list_all(api_client.links().list, filters=[
98         ['link_class', '=', _migration_link_class],
99         ['name', '=', _migration_link_name],
100     ])
101
102     already_migrated = set()
103     for m in migration_links:
104         already_migrated.add(m["tail_uuid"])
105
106     items = arvados.util.list_all(api_client.collections().list,
107                                   filters=[["uuid", "in", [img["collection"] for img in old_images]]],
108                                   select=["uuid", "portable_data_hash"])
109     uuid_to_pdh = {i["uuid"]: i["portable_data_hash"] for i in items}
110
111     need_migrate = {}
112     for img in old_images:
113         pdh = uuid_to_pdh[img["collection"]]
114         if pdh not in already_migrated and (only_migrate is None or pdh in only_migrate):
115             need_migrate[pdh] = img
116
117     if args.print_unmigrated:
118         only_migrate = set()
119         for pdh in need_migrate:
120             print pdh
121         return
122
123     logger.info("Already migrated %i images", len(already_migrated))
124     logger.info("Need to migrate %i images", len(need_migrate))
125
126     if args.dry_run:
127         return
128
129     success = []
130     failures = []
131     count = 1
132     for old_image in need_migrate.values():
133         if uuid_to_pdh[old_image["collection"]] in already_migrated:
134             continue
135
136         logger.info("[%i/%i] Migrating %s:%s (%s)", count, len(need_migrate), old_image["repo"], old_image["tag"], old_image["collection"])
137         count += 1
138         start = time.time()
139
140         oldcol = CollectionReader(old_image["collection"])
141         tarfile = oldcol.keys()[0]
142
143         varlibdocker = tempfile.mkdtemp()
144         try:
145             with tempfile.NamedTemporaryFile() as envfile:
146                 envfile.write("ARVADOS_API_HOST=%s\n" % (os.environ["ARVADOS_API_HOST"]))
147                 envfile.write("ARVADOS_API_TOKEN=%s\n" % (os.environ["ARVADOS_API_TOKEN"]))
148                 if "ARVADOS_API_HOST_INSECURE" in os.environ:
149                     envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (os.environ["ARVADOS_API_HOST_INSECURE"]))
150                 envfile.flush()
151
152                 dockercmd = ["docker", "run",
153                              "--privileged",
154                              "--rm",
155                              "--env-file", envfile.name,
156                              "--volume", "%s:/var/lib/docker" % varlibdocker,
157                              "arvados/migrate-docker19",
158                              "/root/migrate.sh",
159                              "%s/%s" % (old_image["collection"], tarfile),
160                              tarfile[0:40],
161                              old_image["repo"],
162                              old_image["tag"],
163                              oldcol.api_response()["owner_uuid"]]
164
165                 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
166                 out, err = proc.communicate()
167
168                 if proc.returncode != 0:
169                     logger.error("Failed with return code %i", proc.returncode)
170                     logger.error("--- Stdout ---\n%s", out)
171                     logger.error("--- Stderr ---\n%s", err)
172                     raise MigrationFailed()
173
174             migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
175             if migrated:
176                 newcol = CollectionReader(migrated.group(1))
177
178                 api_client.links().create(body={"link": {
179                     'owner_uuid': sys_uuid,
180                     'link_class': _migration_link_class,
181                     'name': _migration_link_name,
182                     'tail_uuid': oldcol.portable_data_hash(),
183                     'head_uuid': newcol.portable_data_hash()
184                     }}).execute(num_retries=3)
185
186                 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
187                             oldcol.portable_data_hash(), old_image["collection"],
188                             newcol.portable_data_hash(), migrated.group(1),
189                             time.time() - start)
190                 already_migrated.add(oldcol.portable_data_hash())
191                 success.append(old_image["collection"])
192             else:
193                 logger.error("Error migrating '%s'", old_image["collection"])
194                 failures.append(old_image["collection"])
195         except Exception as e:
196             logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
197                          exc_info=(not isinstance(e, MigrationFailed)))
198             failures.append(old_image["collection"])
199         finally:
200             shutil.rmtree(varlibdocker)
201
202     logger.info("Successfully migrated %i images", len(success))
203     if failures:
204         logger.error("Failed to migrate %i images", len(failures))