3826: Merge branch 'master' into 3826-crunchstat-netstats
[arvados.git] / sdk / python / arvados / commands / keepdocker.py
1 #!/usr/bin/env python
2
3 import argparse
4 import datetime
5 import errno
6 import json
7 import os
8 import subprocess
9 import sys
10 import tarfile
11 import tempfile
12 import _strptime
13
14 from collections import namedtuple
15 from stat import *
16
17 import arvados
18 import arvados.commands._util as arv_cmd
19 import arvados.commands.put as arv_put
20
21 STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
22
23 DockerImage = namedtuple('DockerImage',
24                          ['repo', 'tag', 'hash', 'created', 'vsize'])
25
26 keepdocker_parser = argparse.ArgumentParser(add_help=False)
27 keepdocker_parser.add_argument(
28     '-f', '--force', action='store_true', default=False,
29     help="Re-upload the image even if it already exists on the server")
30
31 _group = keepdocker_parser.add_mutually_exclusive_group()
32 _group.add_argument(
33     '--pull', action='store_true', default=False,
34     help="Try to pull the latest image from Docker registry")
35 _group.add_argument(
36     '--no-pull', action='store_false', dest='pull',
37     help="Use locally installed image only, don't pull image from Docker registry (default)")
38
39 keepdocker_parser.add_argument(
40     'image', nargs='?',
41     help="Docker image to upload, as a repository name or hash")
42 keepdocker_parser.add_argument(
43     'tag', nargs='?', default='latest',
44     help="Tag of the Docker image to upload (default 'latest')")
45
46 # Combine keepdocker options listed above with run_opts options of arv-put.
47 # The options inherited from arv-put include --name, --project-uuid,
48 # --progress/--no-progress/--batch-progress and --resume/--no-resume.
49 arg_parser = argparse.ArgumentParser(
50         description="Upload or list Docker images in Arvados",
51         parents=[keepdocker_parser, arv_put.run_opts, arv_cmd.retry_opt])
52
53 class DockerError(Exception):
54     pass
55
56
57 def popen_docker(cmd, *args, **kwargs):
58     manage_stdin = ('stdin' not in kwargs)
59     kwargs.setdefault('stdin', subprocess.PIPE)
60     kwargs.setdefault('stdout', sys.stderr)
61     try:
62         docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
63     except OSError:  # No docker.io in $PATH
64         docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
65     if manage_stdin:
66         docker_proc.stdin.close()
67     return docker_proc
68
69 def check_docker(proc, description):
70     proc.wait()
71     if proc.returncode != 0:
72         raise DockerError("docker {} returned status code {}".
73                           format(description, proc.returncode))
74
75 def docker_images():
76     # Yield a DockerImage tuple for each installed image.
77     list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
78     list_output = iter(list_proc.stdout)
79     next(list_output)  # Ignore the header line
80     for line in list_output:
81         words = line.split()
82         size_index = len(words) - 2
83         repo, tag, imageid = words[:3]
84         ctime = ' '.join(words[3:size_index])
85         vsize = ' '.join(words[size_index:])
86         yield DockerImage(repo, tag, imageid, ctime, vsize)
87     list_proc.stdout.close()
88     check_docker(list_proc, "images")
89
90 def find_image_hashes(image_search, image_tag=None):
91     # Given one argument, search for Docker images with matching hashes,
92     # and return their full hashes in a set.
93     # Given two arguments, also search for a Docker image with the
94     # same repository and tag.  If one is found, return its hash in a
95     # set; otherwise, fall back to the one-argument hash search.
96     # Returns None if no match is found, or a hash search is ambiguous.
97     hash_search = image_search.lower()
98     hash_matches = set()
99     for image in docker_images():
100         if (image.repo == image_search) and (image.tag == image_tag):
101             return set([image.hash])
102         elif image.hash.startswith(hash_search):
103             hash_matches.add(image.hash)
104     return hash_matches
105
106 def find_one_image_hash(image_search, image_tag=None):
107     hashes = find_image_hashes(image_search, image_tag)
108     hash_count = len(hashes)
109     if hash_count == 1:
110         return hashes.pop()
111     elif hash_count == 0:
112         raise DockerError("no matching image found")
113     else:
114         raise DockerError("{} images match {}".format(hash_count, image_search))
115
116 def stat_cache_name(image_file):
117     return getattr(image_file, 'name', image_file) + '.stat'
118
119 def pull_image(image_name, image_tag):
120     check_docker(popen_docker(['pull', '{}:{}'.format(image_name, image_tag)]),
121                  "pull")
122
123 def save_image(image_hash, image_file):
124     # Save the specified Docker image to image_file, then try to save its
125     # stats so we can try to resume after interruption.
126     check_docker(popen_docker(['save', image_hash], stdout=image_file),
127                  "save")
128     image_file.flush()
129     try:
130         with open(stat_cache_name(image_file), 'w') as statfile:
131             json.dump(tuple(os.fstat(image_file.fileno())), statfile)
132     except STAT_CACHE_ERRORS:
133         pass  # We won't resume from this cache.  No big deal.
134
135 def prep_image_file(filename):
136     # Return a file object ready to save a Docker image,
137     # and a boolean indicating whether or not we need to actually save the
138     # image (False if a cached save is available).
139     cache_dir = arv_cmd.make_home_conf_dir(
140         os.path.join('.cache', 'arvados', 'docker'), 0o700)
141     if cache_dir is None:
142         image_file = tempfile.NamedTemporaryFile(suffix='.tar')
143         need_save = True
144     else:
145         file_path = os.path.join(cache_dir, filename)
146         try:
147             with open(stat_cache_name(file_path)) as statfile:
148                 prev_stat = json.load(statfile)
149             now_stat = os.stat(file_path)
150             need_save = any(prev_stat[field] != now_stat[field]
151                             for field in [ST_MTIME, ST_SIZE])
152         except STAT_CACHE_ERRORS + (AttributeError, IndexError):
153             need_save = True  # We couldn't compare against old stats
154         image_file = open(file_path, 'w+b' if need_save else 'rb')
155     return image_file, need_save
156
157 def make_link(api_client, num_retries, link_class, link_name, **link_attrs):
158     link_attrs.update({'link_class': link_class, 'name': link_name})
159     return api_client.links().create(body=link_attrs).execute(
160         num_retries=num_retries)
161
162 def ptimestamp(t):
163     s = t.split(".")
164     if len(s) == 2:
165         t = s[0] + s[1][-1:]
166     return datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%SZ")
167
168 def list_images_in_arv(api_client, num_retries):
169     existing_links = api_client.links().list(
170         filters=[['link_class', 'in', ['docker_image_hash', 'docker_image_repo+tag']]]
171         ).execute(num_retries=num_retries)['items']
172     images = {}
173     for link in existing_links:
174         collection_uuid = link["head_uuid"]
175         if collection_uuid not in images:
176             images[collection_uuid]= {"dockerhash": "<none>",
177                       "repo":"<none>",
178                       "tag":"<none>",
179                       "timestamp": ptimestamp("1970-01-01T00:00:01Z")}
180
181         if link["link_class"] == "docker_image_hash":
182             images[collection_uuid]["dockerhash"] = link["name"]
183
184         if link["link_class"] == "docker_image_repo+tag":
185             r = link["name"].split(":")
186             images[collection_uuid]["repo"] = r[0]
187             if len(r) > 1:
188                 images[collection_uuid]["tag"] = r[1]
189
190         if "image_timestamp" in link["properties"]:
191             images[collection_uuid]["timestamp"] = ptimestamp(link["properties"]["image_timestamp"])
192         else:
193             images[collection_uuid]["timestamp"] = ptimestamp(link["created_at"])
194
195     st = sorted(images.items(), lambda a, b: cmp(b[1]["timestamp"], a[1]["timestamp"]))
196
197     fmt = "{:30}  {:10}  {:12}  {:29}  {:20}"
198     print fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED")
199     for i, j in st:
200         print(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:12], i, j["timestamp"].strftime("%c")))
201
202 def main(arguments=None):
203     args = arg_parser.parse_args(arguments)
204     api = arvados.api('v1')
205
206     if args.image is None or args.image == 'images':
207         list_images_in_arv(api, args.retries)
208         sys.exit(0)
209
210     # Pull the image if requested, unless the image is specified as a hash
211     # that we already have.
212     if args.pull and not find_image_hashes(args.image):
213         pull_image(args.image, args.tag)
214
215     try:
216         image_hash = find_one_image_hash(args.image, args.tag)
217     except DockerError as error:
218         print >>sys.stderr, "arv-keepdocker:", error.message
219         sys.exit(1)
220
221     image_repo_tag = '{}:{}'.format(args.image, args.tag) if not image_hash.startswith(args.image.lower()) else None
222
223     if args.name is None:
224         if image_repo_tag:
225             collection_name = 'Docker image {} {}'.format(image_repo_tag, image_hash[0:12])
226         else:
227             collection_name = 'Docker image {}'.format(image_hash[0:12])
228     else:
229         collection_name = args.name
230
231     if not args.force:
232         # Check if this image is already in Arvados.
233
234         # Project where everything should be owned
235         if args.project_uuid:
236             parent_project_uuid = args.project_uuid
237         else:
238             parent_project_uuid = api.users().current().execute(
239                 num_retries=args.retries)['uuid']
240
241         # Find image hash tags
242         existing_links = api.links().list(
243             filters=[['link_class', '=', 'docker_image_hash'],
244                      ['name', '=', image_hash]]
245             ).execute(num_retries=args.retries)['items']
246         if existing_links:
247             # get readable collections
248             collections = api.collections().list(
249                 filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]],
250                 select=["uuid", "owner_uuid", "name", "manifest_text"]
251                 ).execute(num_retries=args.retries)['items']
252
253             if collections:
254                 # check for repo+tag links on these collections
255                 existing_repo_tag = (api.links().list(
256                     filters=[['link_class', '=', 'docker_image_repo+tag'],
257                              ['name', '=', image_repo_tag],
258                              ['head_uuid', 'in', collections]]
259                     ).execute(num_retries=args.retries)['items']) if image_repo_tag else []
260
261                 # Filter on elements owned by the parent project
262                 owned_col = [c for c in collections if c['owner_uuid'] == parent_project_uuid]
263                 owned_img = [c for c in existing_links if c['owner_uuid'] == parent_project_uuid]
264                 owned_rep = [c for c in existing_repo_tag if c['owner_uuid'] == parent_project_uuid]
265
266                 if owned_col:
267                     # already have a collection owned by this project
268                     coll_uuid = owned_col[0]['uuid']
269                 else:
270                     # create new collection owned by the project
271                     coll_uuid = api.collections().create(
272                         body={"manifest_text": collections[0]['manifest_text'],
273                               "name": collection_name,
274                               "owner_uuid": parent_project_uuid},
275                         ensure_unique_name=True
276                         ).execute(num_retries=args.retries)['uuid']
277
278                 link_base = {'owner_uuid': parent_project_uuid,
279                              'head_uuid':  coll_uuid }
280
281                 if not owned_img:
282                     # create image link owned by the project
283                     make_link(api, args.retries,
284                               'docker_image_hash', image_hash, **link_base)
285
286                 if not owned_rep and image_repo_tag:
287                     # create repo+tag link owned by the project
288                     make_link(api, args.retries, 'docker_image_repo+tag',
289                               image_repo_tag, **link_base)
290
291                 print(coll_uuid)
292
293                 sys.exit(0)
294
295     # Open a file for the saved image, and write it if needed.
296     outfile_name = '{}.tar'.format(image_hash)
297     image_file, need_save = prep_image_file(outfile_name)
298     if need_save:
299         save_image(image_hash, image_file)
300
301     # Call arv-put with switches we inherited from it
302     # (a.k.a., switches that aren't our own).
303     put_args = keepdocker_parser.parse_known_args(arguments)[1]
304
305     if args.name is None:
306         put_args += ['--name', collection_name]
307
308     coll_uuid = arv_put.main(
309         put_args + ['--filename', outfile_name, image_file.name]).strip()
310
311     # Read the image metadata and make Arvados links from it.
312     image_file.seek(0)
313     image_tar = tarfile.open(fileobj=image_file)
314     json_file = image_tar.extractfile(image_tar.getmember(image_hash + '/json'))
315     image_metadata = json.load(json_file)
316     json_file.close()
317     image_tar.close()
318     link_base = {'head_uuid': coll_uuid, 'properties': {}}
319     if 'created' in image_metadata:
320         link_base['properties']['image_timestamp'] = image_metadata['created']
321     if args.project_uuid is not None:
322         link_base['owner_uuid'] = args.project_uuid
323
324     make_link(api, args.retries, 'docker_image_hash', image_hash, **link_base)
325     if image_repo_tag:
326         make_link(api, args.retries,
327                   'docker_image_repo+tag', image_repo_tag, **link_base)
328
329     # Clean up.
330     image_file.close()
331     for filename in [stat_cache_name(image_file), image_file.name]:
332         try:
333             os.unlink(filename)
334         except OSError as error:
335             if error.errno != errno.ENOENT:
336                 raise
337
338 if __name__ == '__main__':
339     main()