15 from operator import itemgetter
20 import arvados.commands._util as arv_cmd
21 import arvados.commands.put as arv_put
24 STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
26 DockerImage = collections.namedtuple(
27 'DockerImage', ['repo', 'tag', 'hash', 'created', 'vsize'])
29 keepdocker_parser = argparse.ArgumentParser(add_help=False)
30 keepdocker_parser.add_argument(
31 '-f', '--force', action='store_true', default=False,
32 help="Re-upload the image even if it already exists on the server")
34 _group = keepdocker_parser.add_mutually_exclusive_group()
36 '--pull', action='store_true', default=False,
37 help="Try to pull the latest image from Docker registry")
39 '--no-pull', action='store_false', dest='pull',
40 help="Use locally installed image only, don't pull image from Docker registry (default)")
42 keepdocker_parser.add_argument(
44 help="Docker image to upload, as a repository name or hash")
45 keepdocker_parser.add_argument(
46 'tag', nargs='?', default='latest',
47 help="Tag of the Docker image to upload (default 'latest')")
49 # Combine keepdocker options listed above with run_opts options of arv-put.
50 # The options inherited from arv-put include --name, --project-uuid,
51 # --progress/--no-progress/--batch-progress and --resume/--no-resume.
52 arg_parser = argparse.ArgumentParser(
53 description="Upload or list Docker images in Arvados",
54 parents=[keepdocker_parser, arv_put.run_opts, arv_cmd.retry_opt])
56 class DockerError(Exception):
60 def popen_docker(cmd, *args, **kwargs):
61 manage_stdin = ('stdin' not in kwargs)
62 kwargs.setdefault('stdin', subprocess.PIPE)
63 kwargs.setdefault('stdout', sys.stderr)
65 docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
66 except OSError: # No docker.io in $PATH
67 docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
69 docker_proc.stdin.close()
72 def check_docker(proc, description):
74 if proc.returncode != 0:
75 raise DockerError("docker {} returned status code {}".
76 format(description, proc.returncode))
79 # Yield a DockerImage tuple for each installed image.
80 list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
81 list_output = iter(list_proc.stdout)
82 next(list_output) # Ignore the header line
83 for line in list_output:
85 size_index = len(words) - 2
86 repo, tag, imageid = words[:3]
87 ctime = ' '.join(words[3:size_index])
88 vsize = ' '.join(words[size_index:])
89 yield DockerImage(repo, tag, imageid, ctime, vsize)
90 list_proc.stdout.close()
91 check_docker(list_proc, "images")
93 def find_image_hashes(image_search, image_tag=None):
94 # Given one argument, search for Docker images with matching hashes,
95 # and return their full hashes in a set.
96 # Given two arguments, also search for a Docker image with the
97 # same repository and tag. If one is found, return its hash in a
98 # set; otherwise, fall back to the one-argument hash search.
99 # Returns None if no match is found, or a hash search is ambiguous.
100 hash_search = image_search.lower()
102 for image in docker_images():
103 if (image.repo == image_search) and (image.tag == image_tag):
104 return set([image.hash])
105 elif image.hash.startswith(hash_search):
106 hash_matches.add(image.hash)
109 def find_one_image_hash(image_search, image_tag=None):
110 hashes = find_image_hashes(image_search, image_tag)
111 hash_count = len(hashes)
114 elif hash_count == 0:
115 raise DockerError("no matching image found")
117 raise DockerError("{} images match {}".format(hash_count, image_search))
119 def stat_cache_name(image_file):
120 return getattr(image_file, 'name', image_file) + '.stat'
122 def pull_image(image_name, image_tag):
123 check_docker(popen_docker(['pull', '{}:{}'.format(image_name, image_tag)]),
126 def save_image(image_hash, image_file):
127 # Save the specified Docker image to image_file, then try to save its
128 # stats so we can try to resume after interruption.
129 check_docker(popen_docker(['save', image_hash], stdout=image_file),
133 with open(stat_cache_name(image_file), 'w') as statfile:
134 json.dump(tuple(os.fstat(image_file.fileno())), statfile)
135 except STAT_CACHE_ERRORS:
136 pass # We won't resume from this cache. No big deal.
138 def prep_image_file(filename):
139 # Return a file object ready to save a Docker image,
140 # and a boolean indicating whether or not we need to actually save the
141 # image (False if a cached save is available).
142 cache_dir = arv_cmd.make_home_conf_dir(
143 os.path.join('.cache', 'arvados', 'docker'), 0o700)
144 if cache_dir is None:
145 image_file = tempfile.NamedTemporaryFile(suffix='.tar')
148 file_path = os.path.join(cache_dir, filename)
150 with open(stat_cache_name(file_path)) as statfile:
151 prev_stat = json.load(statfile)
152 now_stat = os.stat(file_path)
153 need_save = any(prev_stat[field] != now_stat[field]
154 for field in [ST_MTIME, ST_SIZE])
155 except STAT_CACHE_ERRORS + (AttributeError, IndexError):
156 need_save = True # We couldn't compare against old stats
157 image_file = open(file_path, 'w+b' if need_save else 'rb')
158 return image_file, need_save
160 def make_link(api_client, num_retries, link_class, link_name, **link_attrs):
161 link_attrs.update({'link_class': link_class, 'name': link_name})
162 return api_client.links().create(body=link_attrs).execute(
163 num_retries=num_retries)
165 def docker_link_sort_key(link):
166 """Build a sort key to find the latest available Docker image.
168 To find one source collection for a Docker image referenced by
169 name or image id, the API server looks for a link with the most
170 recent `image_timestamp` property; then the most recent
171 `created_at` timestamp. This method generates a sort key for
172 Docker metadata links to sort them from least to most preferred.
175 image_timestamp = ciso8601.parse_datetime_unaware(
176 link['properties']['image_timestamp'])
177 except (KeyError, ValueError):
179 return (image_timestamp,
180 ciso8601.parse_datetime_unaware(link['created_at']))
182 def _get_docker_links(api_client, num_retries, **kwargs):
183 links = arvados.util.list_all(api_client.links().list,
184 num_retries, **kwargs)
186 link['_sort_key'] = docker_link_sort_key(link)
187 links.sort(key=itemgetter('_sort_key'), reverse=True)
190 def _new_image_listing(link, dockerhash, repo='<none>', tag='<none>'):
192 '_sort_key': link['_sort_key'],
193 'timestamp': link['_sort_key'][0] or link['_sort_key'][1],
194 'collection': link['head_uuid'],
195 'dockerhash': dockerhash,
200 def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None):
201 """List all Docker images known to the api_client with image_name and
202 image_tag. If no image_name is given, defaults to listing all
205 Returns a list of tuples representing matching Docker images,
206 sorted in preference order (i.e. the first collection in the list
207 is the one that the API server would use). Each tuple is a
208 (collection_uuid, collection_info) pair, where collection_info is
209 a dict with fields "dockerhash", "repo", "tag", and "timestamp".
216 # Find images with the name the user specified.
217 search_links = _get_docker_links(
218 api_client, num_retries,
219 filters=[['link_class', '=', 'docker_image_repo+tag'],
221 '{}:{}'.format(image_name, image_tag or 'latest')]])
223 repo_links = search_links
225 # Fall back to finding images with the specified image hash.
226 search_links = _get_docker_links(
227 api_client, num_retries,
228 filters=[['link_class', '=', 'docker_image_hash'],
229 ['name', 'ilike', image_name + '%']])
230 hash_links = search_links
231 # Only list information about images that were found in the search.
232 search_filters.append(['head_uuid', 'in',
233 [link['head_uuid'] for link in search_links]])
235 # It should be reasonable to expect that each collection only has one
236 # image hash (though there may be many links specifying this). Find
237 # the API server's most preferred image hash link for each collection.
238 if hash_links is None:
239 hash_links = _get_docker_links(
240 api_client, num_retries,
241 filters=search_filters + [['link_class', '=', 'docker_image_hash']])
242 hash_link_map = {link['head_uuid']: link for link in reversed(hash_links)}
244 # Each collection may have more than one name (though again, one name
245 # may be specified more than once). Build an image listing from name
246 # tags, sorted by API server preference.
247 if repo_links is None:
248 repo_links = _get_docker_links(
249 api_client, num_retries,
250 filters=search_filters + [['link_class', '=',
251 'docker_image_repo+tag']])
252 seen_image_names = collections.defaultdict(set)
254 for link in repo_links:
255 collection_uuid = link['head_uuid']
256 if link['name'] in seen_image_names[collection_uuid]:
258 seen_image_names[collection_uuid].add(link['name'])
260 dockerhash = hash_link_map[collection_uuid]['name']
262 dockerhash = '<unknown>'
263 name_parts = link['name'].split(':', 1)
264 images.append(_new_image_listing(link, dockerhash, *name_parts))
266 # Find any image hash links that did not have a corresponding name link,
267 # and add image listings for them, retaining the API server preference
269 images_start_size = len(images)
270 for collection_uuid, link in hash_link_map.iteritems():
271 if not seen_image_names[collection_uuid]:
272 images.append(_new_image_listing(link, link['name']))
273 if len(images) > images_start_size:
274 images.sort(key=itemgetter('_sort_key'), reverse=True)
276 # Remove any image listings that refer to unknown collections.
277 existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all(
278 api_client.collections().list, num_retries,
279 filters=[['uuid', 'in', [im['collection'] for im in images]]],
281 return [(image['collection'], image) for image in images
282 if image['collection'] in existing_coll_uuids]
284 def main(arguments=None):
285 args = arg_parser.parse_args(arguments)
286 api = arvados.api('v1')
288 if args.image is None or args.image == 'images':
289 fmt = "{:30} {:10} {:12} {:29} {:20}"
290 print fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED")
291 for i, j in list_images_in_arv(api, args.retries):
292 print(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:12], i, j["timestamp"].strftime("%c")))
295 # Pull the image if requested, unless the image is specified as a hash
296 # that we already have.
297 if args.pull and not find_image_hashes(args.image):
298 pull_image(args.image, args.tag)
301 image_hash = find_one_image_hash(args.image, args.tag)
302 except DockerError as error:
303 print >>sys.stderr, "arv-keepdocker:", error.message
306 image_repo_tag = '{}:{}'.format(args.image, args.tag) if not image_hash.startswith(args.image.lower()) else None
308 if args.name is None:
310 collection_name = 'Docker image {} {}'.format(image_repo_tag, image_hash[0:12])
312 collection_name = 'Docker image {}'.format(image_hash[0:12])
314 collection_name = args.name
317 # Check if this image is already in Arvados.
319 # Project where everything should be owned
320 if args.project_uuid:
321 parent_project_uuid = args.project_uuid
323 parent_project_uuid = api.users().current().execute(
324 num_retries=args.retries)['uuid']
326 # Find image hash tags
327 existing_links = api.links().list(
328 filters=[['link_class', '=', 'docker_image_hash'],
329 ['name', '=', image_hash]]
330 ).execute(num_retries=args.retries)['items']
332 # get readable collections
333 collections = api.collections().list(
334 filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]],
335 select=["uuid", "owner_uuid", "name", "manifest_text"]
336 ).execute(num_retries=args.retries)['items']
339 # check for repo+tag links on these collections
340 existing_repo_tag = (api.links().list(
341 filters=[['link_class', '=', 'docker_image_repo+tag'],
342 ['name', '=', image_repo_tag],
343 ['head_uuid', 'in', collections]]
344 ).execute(num_retries=args.retries)['items']) if image_repo_tag else []
346 # Filter on elements owned by the parent project
347 owned_col = [c for c in collections if c['owner_uuid'] == parent_project_uuid]
348 owned_img = [c for c in existing_links if c['owner_uuid'] == parent_project_uuid]
349 owned_rep = [c for c in existing_repo_tag if c['owner_uuid'] == parent_project_uuid]
352 # already have a collection owned by this project
353 coll_uuid = owned_col[0]['uuid']
355 # create new collection owned by the project
356 coll_uuid = api.collections().create(
357 body={"manifest_text": collections[0]['manifest_text'],
358 "name": collection_name,
359 "owner_uuid": parent_project_uuid},
360 ensure_unique_name=True
361 ).execute(num_retries=args.retries)['uuid']
363 link_base = {'owner_uuid': parent_project_uuid,
364 'head_uuid': coll_uuid }
367 # create image link owned by the project
368 make_link(api, args.retries,
369 'docker_image_hash', image_hash, **link_base)
371 if not owned_rep and image_repo_tag:
372 # create repo+tag link owned by the project
373 make_link(api, args.retries, 'docker_image_repo+tag',
374 image_repo_tag, **link_base)
380 # Open a file for the saved image, and write it if needed.
381 outfile_name = '{}.tar'.format(image_hash)
382 image_file, need_save = prep_image_file(outfile_name)
384 save_image(image_hash, image_file)
386 # Call arv-put with switches we inherited from it
387 # (a.k.a., switches that aren't our own).
388 put_args = keepdocker_parser.parse_known_args(arguments)[1]
390 if args.name is None:
391 put_args += ['--name', collection_name]
393 coll_uuid = arv_put.main(
394 put_args + ['--filename', outfile_name, image_file.name]).strip()
396 # Read the image metadata and make Arvados links from it.
398 image_tar = tarfile.open(fileobj=image_file)
399 json_file = image_tar.extractfile(image_tar.getmember(image_hash + '/json'))
400 image_metadata = json.load(json_file)
403 link_base = {'head_uuid': coll_uuid, 'properties': {}}
404 if 'created' in image_metadata:
405 link_base['properties']['image_timestamp'] = image_metadata['created']
406 if args.project_uuid is not None:
407 link_base['owner_uuid'] = args.project_uuid
409 make_link(api, args.retries, 'docker_image_hash', image_hash, **link_base)
411 make_link(api, args.retries,
412 'docker_image_repo+tag', image_repo_tag, **link_base)
416 for filename in [stat_cache_name(image_file), image_file.name]:
419 except OSError as error:
420 if error.errno != errno.ENOENT:
423 if __name__ == '__main__':