15 from operator import itemgetter
20 import arvados.commands._util as arv_cmd
21 import arvados.commands.put as arv_put
24 from arvados._version import __version__
26 EARLIEST_DATETIME = datetime.datetime(datetime.MINYEAR, 1, 1, 0, 0, 0)
27 STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
29 DockerImage = collections.namedtuple(
30 'DockerImage', ['repo', 'tag', 'hash', 'created', 'vsize'])
32 keepdocker_parser = argparse.ArgumentParser(add_help=False)
33 keepdocker_parser.add_argument(
34 '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
35 help='Print version and exit.')
36 keepdocker_parser.add_argument(
37 '-f', '--force', action='store_true', default=False,
38 help="Re-upload the image even if it already exists on the server")
39 keepdocker_parser.add_argument(
40 '--force-image-format', action='store_true', default=False,
41 help="Proceed even if the image format is not supported by the server")
43 _group = keepdocker_parser.add_mutually_exclusive_group()
45 '--pull', action='store_true', default=False,
46 help="Try to pull the latest image from Docker registry")
48 '--no-pull', action='store_false', dest='pull',
49 help="Use locally installed image only, don't pull image from Docker registry (default)")
51 keepdocker_parser.add_argument(
53 help="Docker image to upload, as a repository name or hash")
54 keepdocker_parser.add_argument(
55 'tag', nargs='?', default='latest',
56 help="Tag of the Docker image to upload (default 'latest')")
58 # Combine keepdocker options listed above with run_opts options of arv-put.
59 # The options inherited from arv-put include --name, --project-uuid,
60 # --progress/--no-progress/--batch-progress and --resume/--no-resume.
61 arg_parser = argparse.ArgumentParser(
62 description="Upload or list Docker images in Arvados",
63 parents=[keepdocker_parser, arv_put.run_opts, arv_cmd.retry_opt])
65 class DockerError(Exception):
69 def popen_docker(cmd, *args, **kwargs):
70 manage_stdin = ('stdin' not in kwargs)
71 kwargs.setdefault('stdin', subprocess.PIPE)
72 kwargs.setdefault('stdout', sys.stderr)
74 docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
75 except OSError: # No docker.io in $PATH
76 docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
78 docker_proc.stdin.close()
81 def check_docker(proc, description):
83 if proc.returncode != 0:
84 raise DockerError("docker {} returned status code {}".
85 format(description, proc.returncode))
87 def docker_image_format(image_hash):
88 """Return the registry format ('v1' or 'v2') of the given image."""
89 cmd = popen_docker(['inspect', '--format={{.Id}}', image_hash],
90 stdout=subprocess.PIPE)
92 image_id = next(cmd.stdout).strip()
93 if image_id.startswith('sha256:'):
95 elif ':' not in image_id:
100 check_docker(cmd, "inspect")
102 def docker_image_compatible(api, image_hash):
103 supported = api._rootDesc.get('dockerImageFormats', [])
105 print >>sys.stderr, "arv-keepdocker: warning: server does not specify supported image formats (see docker_image_formats in server config). Continuing."
108 fmt = docker_image_format(image_hash)
112 print >>sys.stderr, "arv-keepdocker: image format is {!r} " \
113 "but server supports only {!r}".format(fmt, supported)
117 # Yield a DockerImage tuple for each installed image.
118 list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
119 list_output = iter(list_proc.stdout)
120 next(list_output) # Ignore the header line
121 for line in list_output:
123 size_index = len(words) - 2
124 repo, tag, imageid = words[:3]
125 ctime = ' '.join(words[3:size_index])
126 vsize = ' '.join(words[size_index:])
127 yield DockerImage(repo, tag, imageid, ctime, vsize)
128 list_proc.stdout.close()
129 check_docker(list_proc, "images")
131 def find_image_hashes(image_search, image_tag=None):
132 # Given one argument, search for Docker images with matching hashes,
133 # and return their full hashes in a set.
134 # Given two arguments, also search for a Docker image with the
135 # same repository and tag. If one is found, return its hash in a
136 # set; otherwise, fall back to the one-argument hash search.
137 # Returns None if no match is found, or a hash search is ambiguous.
138 hash_search = image_search.lower()
140 for image in docker_images():
141 if (image.repo == image_search) and (image.tag == image_tag):
142 return set([image.hash])
143 elif image.hash.startswith(hash_search):
144 hash_matches.add(image.hash)
147 def find_one_image_hash(image_search, image_tag=None):
148 hashes = find_image_hashes(image_search, image_tag)
149 hash_count = len(hashes)
152 elif hash_count == 0:
153 raise DockerError("no matching image found")
155 raise DockerError("{} images match {}".format(hash_count, image_search))
157 def stat_cache_name(image_file):
158 return getattr(image_file, 'name', image_file) + '.stat'
160 def pull_image(image_name, image_tag):
161 check_docker(popen_docker(['pull', '{}:{}'.format(image_name, image_tag)]),
164 def save_image(image_hash, image_file):
165 # Save the specified Docker image to image_file, then try to save its
166 # stats so we can try to resume after interruption.
167 check_docker(popen_docker(['save', image_hash], stdout=image_file),
171 with open(stat_cache_name(image_file), 'w') as statfile:
172 json.dump(tuple(os.fstat(image_file.fileno())), statfile)
173 except STAT_CACHE_ERRORS:
174 pass # We won't resume from this cache. No big deal.
176 def prep_image_file(filename):
177 # Return a file object ready to save a Docker image,
178 # and a boolean indicating whether or not we need to actually save the
179 # image (False if a cached save is available).
180 cache_dir = arv_cmd.make_home_conf_dir(
181 os.path.join('.cache', 'arvados', 'docker'), 0o700)
182 if cache_dir is None:
183 image_file = tempfile.NamedTemporaryFile(suffix='.tar')
186 file_path = os.path.join(cache_dir, filename)
188 with open(stat_cache_name(file_path)) as statfile:
189 prev_stat = json.load(statfile)
190 now_stat = os.stat(file_path)
191 need_save = any(prev_stat[field] != now_stat[field]
192 for field in [ST_MTIME, ST_SIZE])
193 except STAT_CACHE_ERRORS + (AttributeError, IndexError):
194 need_save = True # We couldn't compare against old stats
195 image_file = open(file_path, 'w+b' if need_save else 'rb')
196 return image_file, need_save
198 def make_link(api_client, num_retries, link_class, link_name, **link_attrs):
199 link_attrs.update({'link_class': link_class, 'name': link_name})
200 return api_client.links().create(body=link_attrs).execute(
201 num_retries=num_retries)
203 def docker_link_sort_key(link):
204 """Build a sort key to find the latest available Docker image.
206 To find one source collection for a Docker image referenced by
207 name or image id, the API server looks for a link with the most
208 recent `image_timestamp` property; then the most recent
209 `created_at` timestamp. This method generates a sort key for
210 Docker metadata links to sort them from least to most preferred.
213 image_timestamp = ciso8601.parse_datetime_unaware(
214 link['properties']['image_timestamp'])
215 except (KeyError, ValueError):
216 image_timestamp = EARLIEST_DATETIME
217 return (image_timestamp,
218 ciso8601.parse_datetime_unaware(link['created_at']))
220 def _get_docker_links(api_client, num_retries, **kwargs):
221 links = arvados.util.list_all(api_client.links().list,
222 num_retries, **kwargs)
224 link['_sort_key'] = docker_link_sort_key(link)
225 links.sort(key=itemgetter('_sort_key'), reverse=True)
228 def _new_image_listing(link, dockerhash, repo='<none>', tag='<none>'):
229 timestamp_index = 1 if (link['_sort_key'][0] is EARLIEST_DATETIME) else 0
231 '_sort_key': link['_sort_key'],
232 'timestamp': link['_sort_key'][timestamp_index],
233 'collection': link['head_uuid'],
234 'dockerhash': dockerhash,
239 def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None):
240 """List all Docker images known to the api_client with image_name and
241 image_tag. If no image_name is given, defaults to listing all
244 Returns a list of tuples representing matching Docker images,
245 sorted in preference order (i.e. the first collection in the list
246 is the one that the API server would use). Each tuple is a
247 (collection_uuid, collection_info) pair, where collection_info is
248 a dict with fields "dockerhash", "repo", "tag", and "timestamp".
255 # Find images with the name the user specified.
256 search_links = _get_docker_links(
257 api_client, num_retries,
258 filters=[['link_class', '=', 'docker_image_repo+tag'],
260 '{}:{}'.format(image_name, image_tag or 'latest')]])
262 repo_links = search_links
264 # Fall back to finding images with the specified image hash.
265 search_links = _get_docker_links(
266 api_client, num_retries,
267 filters=[['link_class', '=', 'docker_image_hash'],
268 ['name', 'ilike', image_name + '%']])
269 hash_links = search_links
270 # Only list information about images that were found in the search.
271 search_filters.append(['head_uuid', 'in',
272 [link['head_uuid'] for link in search_links]])
274 # It should be reasonable to expect that each collection only has one
275 # image hash (though there may be many links specifying this). Find
276 # the API server's most preferred image hash link for each collection.
277 if hash_links is None:
278 hash_links = _get_docker_links(
279 api_client, num_retries,
280 filters=search_filters + [['link_class', '=', 'docker_image_hash']])
281 hash_link_map = {link['head_uuid']: link for link in reversed(hash_links)}
283 # Each collection may have more than one name (though again, one name
284 # may be specified more than once). Build an image listing from name
285 # tags, sorted by API server preference.
286 if repo_links is None:
287 repo_links = _get_docker_links(
288 api_client, num_retries,
289 filters=search_filters + [['link_class', '=',
290 'docker_image_repo+tag']])
291 seen_image_names = collections.defaultdict(set)
293 for link in repo_links:
294 collection_uuid = link['head_uuid']
295 if link['name'] in seen_image_names[collection_uuid]:
297 seen_image_names[collection_uuid].add(link['name'])
299 dockerhash = hash_link_map[collection_uuid]['name']
301 dockerhash = '<unknown>'
302 name_parts = link['name'].split(':', 1)
303 images.append(_new_image_listing(link, dockerhash, *name_parts))
305 # Find any image hash links that did not have a corresponding name link,
306 # and add image listings for them, retaining the API server preference
308 images_start_size = len(images)
309 for collection_uuid, link in hash_link_map.iteritems():
310 if not seen_image_names[collection_uuid]:
311 images.append(_new_image_listing(link, link['name']))
312 if len(images) > images_start_size:
313 images.sort(key=itemgetter('_sort_key'), reverse=True)
315 # Remove any image listings that refer to unknown collections.
316 existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all(
317 api_client.collections().list, num_retries,
318 filters=[['uuid', 'in', [im['collection'] for im in images]]],
320 return [(image['collection'], image) for image in images
321 if image['collection'] in existing_coll_uuids]
323 def items_owned_by(owner_uuid, arv_items):
324 return (item for item in arv_items if item['owner_uuid'] == owner_uuid)
326 def main(arguments=None, stdout=sys.stdout):
327 args = arg_parser.parse_args(arguments)
328 api = arvados.api('v1')
330 if args.image is None or args.image == 'images':
331 fmt = "{:30} {:10} {:12} {:29} {:20}\n"
332 stdout.write(fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED"))
333 for i, j in list_images_in_arv(api, args.retries):
334 stdout.write(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:12], i, j["timestamp"].strftime("%c")))
337 # Pull the image if requested, unless the image is specified as a hash
338 # that we already have.
339 if args.pull and not find_image_hashes(args.image):
340 pull_image(args.image, args.tag)
343 image_hash = find_one_image_hash(args.image, args.tag)
344 except DockerError as error:
345 print >>sys.stderr, "arv-keepdocker:", error.message
348 if not docker_image_compatible(api, image_hash):
349 if args.force_image_format:
350 print >>sys.stderr, "arv-keepdocker: forcing incompatible image"
352 print >>sys.stderr, "arv-keepdocker: refusing to store " \
353 "incompatible format (use --force-image-format to override)"
356 image_repo_tag = '{}:{}'.format(args.image, args.tag) if not image_hash.startswith(args.image.lower()) else None
358 if args.name is None:
360 collection_name = 'Docker image {} {}'.format(image_repo_tag, image_hash[0:12])
362 collection_name = 'Docker image {}'.format(image_hash[0:12])
364 collection_name = args.name
367 # Check if this image is already in Arvados.
369 # Project where everything should be owned
370 if args.project_uuid:
371 parent_project_uuid = args.project_uuid
373 parent_project_uuid = api.users().current().execute(
374 num_retries=args.retries)['uuid']
376 # Find image hash tags
377 existing_links = _get_docker_links(
379 filters=[['link_class', '=', 'docker_image_hash'],
380 ['name', '=', image_hash]])
382 # get readable collections
383 collections = api.collections().list(
384 filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]],
385 select=["uuid", "owner_uuid", "name", "manifest_text"]
386 ).execute(num_retries=args.retries)['items']
389 # check for repo+tag links on these collections
391 existing_repo_tag = _get_docker_links(
393 filters=[['link_class', '=', 'docker_image_repo+tag'],
394 ['name', '=', image_repo_tag],
395 ['head_uuid', 'in', collections]])
397 existing_repo_tag = []
400 coll_uuid = next(items_owned_by(parent_project_uuid, collections))['uuid']
401 except StopIteration:
402 # create new collection owned by the project
403 coll_uuid = api.collections().create(
404 body={"manifest_text": collections[0]['manifest_text'],
405 "name": collection_name,
406 "owner_uuid": parent_project_uuid},
407 ensure_unique_name=True
408 ).execute(num_retries=args.retries)['uuid']
410 link_base = {'owner_uuid': parent_project_uuid,
411 'head_uuid': coll_uuid,
412 'properties': existing_links[0]['properties']}
414 if not any(items_owned_by(parent_project_uuid, existing_links)):
415 # create image link owned by the project
416 make_link(api, args.retries,
417 'docker_image_hash', image_hash, **link_base)
419 if image_repo_tag and not any(items_owned_by(parent_project_uuid, existing_repo_tag)):
420 # create repo+tag link owned by the project
421 make_link(api, args.retries, 'docker_image_repo+tag',
422 image_repo_tag, **link_base)
424 stdout.write(coll_uuid + "\n")
428 # Open a file for the saved image, and write it if needed.
429 outfile_name = '{}.tar'.format(image_hash)
430 image_file, need_save = prep_image_file(outfile_name)
432 save_image(image_hash, image_file)
434 # Call arv-put with switches we inherited from it
435 # (a.k.a., switches that aren't our own).
436 put_args = keepdocker_parser.parse_known_args(arguments)[1]
438 if args.name is None:
439 put_args += ['--name', collection_name]
441 coll_uuid = arv_put.main(
442 put_args + ['--filename', outfile_name, image_file.name], stdout=stdout).strip()
444 # Read the image metadata and make Arvados links from it.
446 image_tar = tarfile.open(fileobj=image_file)
447 image_hash_type, _, raw_image_hash = image_hash.rpartition(':')
449 json_filename = raw_image_hash + '.json'
451 json_filename = raw_image_hash + '/json'
452 json_file = image_tar.extractfile(image_tar.getmember(json_filename))
453 image_metadata = json.load(json_file)
456 link_base = {'head_uuid': coll_uuid, 'properties': {}}
457 if 'created' in image_metadata:
458 link_base['properties']['image_timestamp'] = image_metadata['created']
459 if args.project_uuid is not None:
460 link_base['owner_uuid'] = args.project_uuid
462 make_link(api, args.retries, 'docker_image_hash', image_hash, **link_base)
464 make_link(api, args.retries,
465 'docker_image_repo+tag', image_repo_tag, **link_base)
469 for filename in [stat_cache_name(image_file), image_file.name]:
472 except OSError as error:
473 if error.errno != errno.ENOENT:
476 if __name__ == '__main__':