Merge branch 'master' into 3661-copy-move-from-show
[arvados.git] / sdk / python / arvados / commands / keepdocker.py
1 #!/usr/bin/env python
2
3 import argparse
4 import datetime
5 import errno
6 import json
7 import os
8 import subprocess
9 import sys
10 import tarfile
11 import tempfile
12
13 from collections import namedtuple
14 from stat import *
15
16 import arvados
17 import arvados.commands._util as arv_cmd
18 import arvados.commands.put as arv_put
19
20 STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
21
22 DockerImage = namedtuple('DockerImage',
23                          ['repo', 'tag', 'hash', 'created', 'vsize'])
24
25 keepdocker_parser = argparse.ArgumentParser(add_help=False)
26 keepdocker_parser.add_argument(
27     '-f', '--force', action='store_true', default=False,
28     help="Re-upload the image even if it already exists on the server")
29
30 _group = keepdocker_parser.add_mutually_exclusive_group()
31 _group.add_argument(
32     '--pull', action='store_true', default=False,
33     help="Try to pull the latest image from Docker registry")
34 _group.add_argument(
35     '--no-pull', action='store_false', dest='pull',
36     help="Use locally installed image only, don't pull image from Docker registry (default)")
37
38 keepdocker_parser.add_argument(
39     'image', nargs='?',
40     help="Docker image to upload, as a repository name or hash")
41 keepdocker_parser.add_argument(
42     'tag', nargs='?', default='latest',
43     help="Tag of the Docker image to upload (default 'latest')")
44
45 # Combine keepdocker options listed above with run_opts options of arv-put.
46 # The options inherited from arv-put include --name, --project-uuid,
47 # --progress/--no-progress/--batch-progress and --resume/--no-resume.
48 arg_parser = argparse.ArgumentParser(
49         description="Upload or list Docker images in Arvados",
50         parents=[keepdocker_parser, arv_put.run_opts, arv_cmd.retry_opt])
51
52 class DockerError(Exception):
53     pass
54
55
56 def popen_docker(cmd, *args, **kwargs):
57     manage_stdin = ('stdin' not in kwargs)
58     kwargs.setdefault('stdin', subprocess.PIPE)
59     kwargs.setdefault('stdout', sys.stderr)
60     try:
61         docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
62     except OSError:  # No docker.io in $PATH
63         docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
64     if manage_stdin:
65         docker_proc.stdin.close()
66     return docker_proc
67
68 def check_docker(proc, description):
69     proc.wait()
70     if proc.returncode != 0:
71         raise DockerError("docker {} returned status code {}".
72                           format(description, proc.returncode))
73
74 def docker_images():
75     # Yield a DockerImage tuple for each installed image.
76     list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
77     list_output = iter(list_proc.stdout)
78     next(list_output)  # Ignore the header line
79     for line in list_output:
80         words = line.split()
81         size_index = len(words) - 2
82         repo, tag, imageid = words[:3]
83         ctime = ' '.join(words[3:size_index])
84         vsize = ' '.join(words[size_index:])
85         yield DockerImage(repo, tag, imageid, ctime, vsize)
86     list_proc.stdout.close()
87     check_docker(list_proc, "images")
88
89 def find_image_hashes(image_search, image_tag=None):
90     # Given one argument, search for Docker images with matching hashes,
91     # and return their full hashes in a set.
92     # Given two arguments, also search for a Docker image with the
93     # same repository and tag.  If one is found, return its hash in a
94     # set; otherwise, fall back to the one-argument hash search.
95     # Returns None if no match is found, or a hash search is ambiguous.
96     hash_search = image_search.lower()
97     hash_matches = set()
98     for image in docker_images():
99         if (image.repo == image_search) and (image.tag == image_tag):
100             return set([image.hash])
101         elif image.hash.startswith(hash_search):
102             hash_matches.add(image.hash)
103     return hash_matches
104
105 def find_one_image_hash(image_search, image_tag=None):
106     hashes = find_image_hashes(image_search, image_tag)
107     hash_count = len(hashes)
108     if hash_count == 1:
109         return hashes.pop()
110     elif hash_count == 0:
111         raise DockerError("no matching image found")
112     else:
113         raise DockerError("{} images match {}".format(hash_count, image_search))
114
115 def stat_cache_name(image_file):
116     return getattr(image_file, 'name', image_file) + '.stat'
117
118 def pull_image(image_name, image_tag):
119     check_docker(popen_docker(['pull', '-t', image_tag, image_name]), "pull")
120
121 def save_image(image_hash, image_file):
122     # Save the specified Docker image to image_file, then try to save its
123     # stats so we can try to resume after interruption.
124     check_docker(popen_docker(['save', image_hash], stdout=image_file),
125                  "save")
126     image_file.flush()
127     try:
128         with open(stat_cache_name(image_file), 'w') as statfile:
129             json.dump(tuple(os.fstat(image_file.fileno())), statfile)
130     except STAT_CACHE_ERRORS:
131         pass  # We won't resume from this cache.  No big deal.
132
133 def prep_image_file(filename):
134     # Return a file object ready to save a Docker image,
135     # and a boolean indicating whether or not we need to actually save the
136     # image (False if a cached save is available).
137     cache_dir = arv_cmd.make_home_conf_dir(
138         os.path.join('.cache', 'arvados', 'docker'), 0o700)
139     if cache_dir is None:
140         image_file = tempfile.NamedTemporaryFile(suffix='.tar')
141         need_save = True
142     else:
143         file_path = os.path.join(cache_dir, filename)
144         try:
145             with open(stat_cache_name(file_path)) as statfile:
146                 prev_stat = json.load(statfile)
147             now_stat = os.stat(file_path)
148             need_save = any(prev_stat[field] != now_stat[field]
149                             for field in [ST_MTIME, ST_SIZE])
150         except STAT_CACHE_ERRORS + (AttributeError, IndexError):
151             need_save = True  # We couldn't compare against old stats
152         image_file = open(file_path, 'w+b' if need_save else 'rb')
153     return image_file, need_save
154
155 def make_link(api_client, num_retries, link_class, link_name, **link_attrs):
156     link_attrs.update({'link_class': link_class, 'name': link_name})
157     return api_client.links().create(body=link_attrs).execute(
158         num_retries=num_retries)
159
160 def ptimestamp(t):
161     s = t.split(".")
162     if len(s) == 2:
163         t = s[0] + s[1][-1:]
164     return datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%SZ")
165
166 def list_images_in_arv(api_client, num_retries):
167     existing_links = api_client.links().list(
168         filters=[['link_class', 'in', ['docker_image_hash', 'docker_image_repo+tag']]]
169         ).execute(num_retries=num_retries)['items']
170     images = {}
171     for link in existing_links:
172         collection_uuid = link["head_uuid"]
173         if collection_uuid not in images:
174             images[collection_uuid]= {"dockerhash": "<none>",
175                       "repo":"<none>",
176                       "tag":"<none>",
177                       "timestamp": ptimestamp("1970-01-01T00:00:01Z")}
178
179         if link["link_class"] == "docker_image_hash":
180             images[collection_uuid]["dockerhash"] = link["name"]
181
182         if link["link_class"] == "docker_image_repo+tag":
183             r = link["name"].split(":")
184             images[collection_uuid]["repo"] = r[0]
185             if len(r) > 1:
186                 images[collection_uuid]["tag"] = r[1]
187
188         if "image_timestamp" in link["properties"]:
189             images[collection_uuid]["timestamp"] = ptimestamp(link["properties"]["image_timestamp"])
190         else:
191             images[collection_uuid]["timestamp"] = ptimestamp(link["created_at"])
192
193     st = sorted(images.items(), lambda a, b: cmp(b[1]["timestamp"], a[1]["timestamp"]))
194
195     fmt = "{:30}  {:10}  {:12}  {:29}  {:20}"
196     print fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED")
197     for i, j in st:
198         print(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:12], i, j["timestamp"].strftime("%c")))
199
200 def main(arguments=None):
201     args = arg_parser.parse_args(arguments)
202     api = arvados.api('v1')
203
204     if args.image is None or args.image == 'images':
205         list_images_in_arv(api, args.retries)
206         sys.exit(0)
207
208     # Pull the image if requested, unless the image is specified as a hash
209     # that we already have.
210     if args.pull and not find_image_hashes(args.image):
211         pull_image(args.image, args.tag)
212
213     try:
214         image_hash = find_one_image_hash(args.image, args.tag)
215     except DockerError as error:
216         print >>sys.stderr, "arv-keepdocker:", error.message
217         sys.exit(1)
218
219     image_repo_tag = '{}:{}'.format(args.image, args.tag) if not image_hash.startswith(args.image.lower()) else None
220
221     if args.name is None:
222         if image_repo_tag:
223             collection_name = 'Docker image {} {}'.format(image_repo_tag, image_hash[0:12])
224         else:
225             collection_name = 'Docker image {}'.format(image_hash[0:12])
226     else:
227         collection_name = args.name
228
229     if not args.force:
230         # Check if this image is already in Arvados.
231
232         # Project where everything should be owned
233         if args.project_uuid:
234             parent_project_uuid = args.project_uuid
235         else:
236             parent_project_uuid = api.users().current().execute(
237                 num_retries=args.retries)['uuid']
238
239         # Find image hash tags
240         existing_links = api.links().list(
241             filters=[['link_class', '=', 'docker_image_hash'],
242                      ['name', '=', image_hash]]
243             ).execute(num_retries=args.retries)['items']
244         if existing_links:
245             # get readable collections
246             collections = api.collections().list(
247                 filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]],
248                 select=["uuid", "owner_uuid", "name", "manifest_text"]
249                 ).execute(num_retries=args.retries)['items']
250
251             if collections:
252                 # check for repo+tag links on these collections
253                 existing_repo_tag = (api.links().list(
254                     filters=[['link_class', '=', 'docker_image_repo+tag'],
255                              ['name', '=', image_repo_tag],
256                              ['head_uuid', 'in', collections]]
257                     ).execute(num_retries=args_retries)['items']) if image_repo_tag else []
258
259                 # Filter on elements owned by the parent project
260                 owned_col = [c for c in collections if c['owner_uuid'] == parent_project_uuid]
261                 owned_img = [c for c in existing_links if c['owner_uuid'] == parent_project_uuid]
262                 owned_rep = [c for c in existing_repo_tag if c['owner_uuid'] == parent_project_uuid]
263
264                 if owned_col:
265                     # already have a collection owned by this project
266                     coll_uuid = owned_col[0]['uuid']
267                 else:
268                     # create new collection owned by the project
269                     coll_uuid = api.collections().create(
270                         body={"manifest_text": collections[0]['manifest_text'],
271                               "name": collection_name,
272                               "owner_uuid": parent_project_uuid},
273                         ensure_unique_name=True
274                         ).execute(num_retries=args.retries)['uuid']
275
276                 link_base = {'owner_uuid': parent_project_uuid,
277                              'head_uuid':  coll_uuid }
278
279                 if not owned_img:
280                     # create image link owned by the project
281                     make_link(api, args.retries,
282                               'docker_image_hash', image_hash, **link_base)
283
284                 if not owned_rep and image_repo_tag:
285                     # create repo+tag link owned by the project
286                     make_link(api, args.retries, 'docker_image_repo+tag',
287                               image_repo_tag, **link_base)
288
289                 print(coll_uuid)
290
291                 sys.exit(0)
292
293     # Open a file for the saved image, and write it if needed.
294     outfile_name = '{}.tar'.format(image_hash)
295     image_file, need_save = prep_image_file(outfile_name)
296     if need_save:
297         save_image(image_hash, image_file)
298
299     # Call arv-put with switches we inherited from it
300     # (a.k.a., switches that aren't our own).
301     put_args = keepdocker_parser.parse_known_args(arguments)[1]
302
303     if args.name is None:
304         put_args += ['--name', collection_name]
305
306     coll_uuid = arv_put.main(
307         put_args + ['--filename', outfile_name, image_file.name]).strip()
308
309     # Read the image metadata and make Arvados links from it.
310     image_file.seek(0)
311     image_tar = tarfile.open(fileobj=image_file)
312     json_file = image_tar.extractfile(image_tar.getmember(image_hash + '/json'))
313     image_metadata = json.load(json_file)
314     json_file.close()
315     image_tar.close()
316     link_base = {'head_uuid': coll_uuid, 'properties': {}}
317     if 'created' in image_metadata:
318         link_base['properties']['image_timestamp'] = image_metadata['created']
319     if args.project_uuid is not None:
320         link_base['owner_uuid'] = args.project_uuid
321
322     make_link(api, args.retries, 'docker_image_hash', image_hash, **link_base)
323     if image_repo_tag:
324         make_link(api, args.retries,
325                   'docker_image_repo+tag', image_repo_tag, **link_base)
326
327     # Clean up.
328     image_file.close()
329     for filename in [stat_cache_name(image_file), image_file.name]:
330         try:
331             os.unlink(filename)
332         except OSError as error:
333             if error.errno != errno.ENOENT:
334                 raise
335
336 if __name__ == '__main__':
337     main()