1 from builtins import next
16 from operator import itemgetter
21 import arvados.commands._util as arv_cmd
22 import arvados.commands.put as arv_put
23 from arvados.collection import CollectionReader
28 from arvados._version import __version__
30 logger = logging.getLogger('arvados.keepdocker')
31 logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
34 EARLIEST_DATETIME = datetime.datetime(datetime.MINYEAR, 1, 1, 0, 0, 0)
35 STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
37 DockerImage = collections.namedtuple(
38 'DockerImage', ['repo', 'tag', 'hash', 'created', 'vsize'])
40 keepdocker_parser = argparse.ArgumentParser(add_help=False)
41 keepdocker_parser.add_argument(
42 '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
43 help='Print version and exit.')
44 keepdocker_parser.add_argument(
45 '-f', '--force', action='store_true', default=False,
46 help="Re-upload the image even if it already exists on the server")
47 keepdocker_parser.add_argument(
48 '--force-image-format', action='store_true', default=False,
49 help="Proceed even if the image format is not supported by the server")
51 _group = keepdocker_parser.add_mutually_exclusive_group()
53 '--pull', action='store_true', default=False,
54 help="Try to pull the latest image from Docker registry")
56 '--no-pull', action='store_false', dest='pull',
57 help="Use locally installed image only, don't pull image from Docker registry (default)")
59 keepdocker_parser.add_argument(
61 help="Docker image to upload, as a repository name or hash")
62 keepdocker_parser.add_argument(
63 'tag', nargs='?', default='latest',
64 help="Tag of the Docker image to upload (default 'latest')")
66 # Combine keepdocker options listed above with run_opts options of arv-put.
67 # The options inherited from arv-put include --name, --project-uuid,
68 # --progress/--no-progress/--batch-progress and --resume/--no-resume.
69 arg_parser = argparse.ArgumentParser(
70 description="Upload or list Docker images in Arvados",
71 parents=[keepdocker_parser, arv_put.run_opts, arv_cmd.retry_opt])
73 class DockerError(Exception):
77 def popen_docker(cmd, *args, **kwargs):
78 manage_stdin = ('stdin' not in kwargs)
79 kwargs.setdefault('stdin', subprocess.PIPE)
80 kwargs.setdefault('stdout', sys.stderr)
82 docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
83 except OSError: # No docker.io in $PATH
84 docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
86 docker_proc.stdin.close()
89 def check_docker(proc, description):
91 if proc.returncode != 0:
92 raise DockerError("docker {} returned status code {}".
93 format(description, proc.returncode))
95 def docker_image_format(image_hash):
96 """Return the registry format ('v1' or 'v2') of the given image."""
97 cmd = popen_docker(['inspect', '--format={{.Id}}', image_hash],
98 stdout=subprocess.PIPE)
100 image_id = next(cmd.stdout).decode().strip()
101 if image_id.startswith('sha256:'):
103 elif ':' not in image_id:
108 check_docker(cmd, "inspect")
110 def docker_image_compatible(api, image_hash):
111 supported = api._rootDesc.get('dockerImageFormats', [])
113 logger.warning("server does not specify supported image formats (see docker_image_formats in server config).")
116 fmt = docker_image_format(image_hash)
120 logger.error("image format is {!r} " \
121 "but server supports only {!r}".format(fmt, supported))
125 # Yield a DockerImage tuple for each installed image.
126 list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
127 list_output = iter(list_proc.stdout)
128 next(list_output) # Ignore the header line
129 for line in list_output:
131 size_index = len(words) - 2
132 repo, tag, imageid = words[:3]
133 ctime = ' '.join(words[3:size_index])
134 vsize = ' '.join(words[size_index:])
135 yield DockerImage(repo, tag, imageid, ctime, vsize)
136 list_proc.stdout.close()
137 check_docker(list_proc, "images")
139 def find_image_hashes(image_search, image_tag=None):
140 # Given one argument, search for Docker images with matching hashes,
141 # and return their full hashes in a set.
142 # Given two arguments, also search for a Docker image with the
143 # same repository and tag. If one is found, return its hash in a
144 # set; otherwise, fall back to the one-argument hash search.
145 # Returns None if no match is found, or a hash search is ambiguous.
146 hash_search = image_search.lower()
148 for image in docker_images():
149 if (image.repo == image_search) and (image.tag == image_tag):
150 return set([image.hash])
151 elif image.hash.startswith(hash_search):
152 hash_matches.add(image.hash)
155 def find_one_image_hash(image_search, image_tag=None):
156 hashes = find_image_hashes(image_search, image_tag)
157 hash_count = len(hashes)
160 elif hash_count == 0:
161 raise DockerError("no matching image found")
163 raise DockerError("{} images match {}".format(hash_count, image_search))
165 def stat_cache_name(image_file):
166 return getattr(image_file, 'name', image_file) + '.stat'
168 def pull_image(image_name, image_tag):
169 check_docker(popen_docker(['pull', '{}:{}'.format(image_name, image_tag)]),
172 def save_image(image_hash, image_file):
173 # Save the specified Docker image to image_file, then try to save its
174 # stats so we can try to resume after interruption.
175 check_docker(popen_docker(['save', image_hash], stdout=image_file),
179 with open(stat_cache_name(image_file), 'w') as statfile:
180 json.dump(tuple(os.fstat(image_file.fileno())), statfile)
181 except STAT_CACHE_ERRORS:
182 pass # We won't resume from this cache. No big deal.
184 def prep_image_file(filename):
185 # Return a file object ready to save a Docker image,
186 # and a boolean indicating whether or not we need to actually save the
187 # image (False if a cached save is available).
188 cache_dir = arv_cmd.make_home_conf_dir(
189 os.path.join('.cache', 'arvados', 'docker'), 0o700)
190 if cache_dir is None:
191 image_file = tempfile.NamedTemporaryFile(suffix='.tar')
194 file_path = os.path.join(cache_dir, filename)
196 with open(stat_cache_name(file_path)) as statfile:
197 prev_stat = json.load(statfile)
198 now_stat = os.stat(file_path)
199 need_save = any(prev_stat[field] != now_stat[field]
200 for field in [ST_MTIME, ST_SIZE])
201 except STAT_CACHE_ERRORS + (AttributeError, IndexError):
202 need_save = True # We couldn't compare against old stats
203 image_file = open(file_path, 'w+b' if need_save else 'rb')
204 return image_file, need_save
206 def make_link(api_client, num_retries, link_class, link_name, **link_attrs):
207 link_attrs.update({'link_class': link_class, 'name': link_name})
208 return api_client.links().create(body=link_attrs).execute(
209 num_retries=num_retries)
211 def docker_link_sort_key(link):
212 """Build a sort key to find the latest available Docker image.
214 To find one source collection for a Docker image referenced by
215 name or image id, the API server looks for a link with the most
216 recent `image_timestamp` property; then the most recent
217 `created_at` timestamp. This method generates a sort key for
218 Docker metadata links to sort them from least to most preferred.
221 image_timestamp = ciso8601.parse_datetime_unaware(
222 link['properties']['image_timestamp'])
223 except (KeyError, ValueError):
224 image_timestamp = EARLIEST_DATETIME
225 return (image_timestamp,
226 ciso8601.parse_datetime_unaware(link['created_at']))
228 def _get_docker_links(api_client, num_retries, **kwargs):
229 links = arvados.util.list_all(api_client.links().list,
230 num_retries, **kwargs)
232 link['_sort_key'] = docker_link_sort_key(link)
233 links.sort(key=itemgetter('_sort_key'), reverse=True)
236 def _new_image_listing(link, dockerhash, repo='<none>', tag='<none>'):
237 timestamp_index = 1 if (link['_sort_key'][0] is EARLIEST_DATETIME) else 0
239 '_sort_key': link['_sort_key'],
240 'timestamp': link['_sort_key'][timestamp_index],
241 'collection': link['head_uuid'],
242 'dockerhash': dockerhash,
247 def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None):
248 """List all Docker images known to the api_client with image_name and
249 image_tag. If no image_name is given, defaults to listing all
252 Returns a list of tuples representing matching Docker images,
253 sorted in preference order (i.e. the first collection in the list
254 is the one that the API server would use). Each tuple is a
255 (collection_uuid, collection_info) pair, where collection_info is
256 a dict with fields "dockerhash", "repo", "tag", and "timestamp".
263 # Find images with the name the user specified.
264 search_links = _get_docker_links(
265 api_client, num_retries,
266 filters=[['link_class', '=', 'docker_image_repo+tag'],
268 '{}:{}'.format(image_name, image_tag or 'latest')]])
270 repo_links = search_links
272 # Fall back to finding images with the specified image hash.
273 search_links = _get_docker_links(
274 api_client, num_retries,
275 filters=[['link_class', '=', 'docker_image_hash'],
276 ['name', 'ilike', image_name + '%']])
277 hash_links = search_links
278 # Only list information about images that were found in the search.
279 search_filters.append(['head_uuid', 'in',
280 [link['head_uuid'] for link in search_links]])
282 # It should be reasonable to expect that each collection only has one
283 # image hash (though there may be many links specifying this). Find
284 # the API server's most preferred image hash link for each collection.
285 if hash_links is None:
286 hash_links = _get_docker_links(
287 api_client, num_retries,
288 filters=search_filters + [['link_class', '=', 'docker_image_hash']])
289 hash_link_map = {link['head_uuid']: link for link in reversed(hash_links)}
291 # Each collection may have more than one name (though again, one name
292 # may be specified more than once). Build an image listing from name
293 # tags, sorted by API server preference.
294 if repo_links is None:
295 repo_links = _get_docker_links(
296 api_client, num_retries,
297 filters=search_filters + [['link_class', '=',
298 'docker_image_repo+tag']])
299 seen_image_names = collections.defaultdict(set)
301 for link in repo_links:
302 collection_uuid = link['head_uuid']
303 if link['name'] in seen_image_names[collection_uuid]:
305 seen_image_names[collection_uuid].add(link['name'])
307 dockerhash = hash_link_map[collection_uuid]['name']
309 dockerhash = '<unknown>'
310 name_parts = link['name'].split(':', 1)
311 images.append(_new_image_listing(link, dockerhash, *name_parts))
313 # Find any image hash links that did not have a corresponding name link,
314 # and add image listings for them, retaining the API server preference
316 images_start_size = len(images)
317 for collection_uuid, link in hash_link_map.items():
318 if not seen_image_names[collection_uuid]:
319 images.append(_new_image_listing(link, link['name']))
320 if len(images) > images_start_size:
321 images.sort(key=itemgetter('_sort_key'), reverse=True)
323 # Remove any image listings that refer to unknown collections.
324 existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all(
325 api_client.collections().list, num_retries,
326 filters=[['uuid', 'in', [im['collection'] for im in images]]],
328 return [(image['collection'], image) for image in images
329 if image['collection'] in existing_coll_uuids]
331 def items_owned_by(owner_uuid, arv_items):
332 return (item for item in arv_items if item['owner_uuid'] == owner_uuid)
334 def _uuid2pdh(api, uuid):
335 return api.collections().list(
336 filters=[['uuid', '=', uuid]],
337 select=['portable_data_hash'],
338 ).execute()['items'][0]['portable_data_hash']
340 def main(arguments=None, stdout=sys.stdout):
341 args = arg_parser.parse_args(arguments)
342 api = arvados.api('v1')
344 if args.image is None or args.image == 'images':
345 fmt = "{:30} {:10} {:12} {:29} {:20}\n"
346 stdout.write(fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED"))
348 for i, j in list_images_in_arv(api, args.retries):
349 stdout.write(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:12], i, j["timestamp"].strftime("%c")))
351 if e.errno == errno.EPIPE:
357 # Pull the image if requested, unless the image is specified as a hash
358 # that we already have.
359 if args.pull and not find_image_hashes(args.image):
360 pull_image(args.image, args.tag)
363 image_hash = find_one_image_hash(args.image, args.tag)
364 except DockerError as error:
365 logger.error(error.message)
368 if not docker_image_compatible(api, image_hash):
369 if args.force_image_format:
370 logger.warning("forcing incompatible image")
372 logger.error("refusing to store " \
373 "incompatible format (use --force-image-format to override)")
376 image_repo_tag = '{}:{}'.format(args.image, args.tag) if not image_hash.startswith(args.image.lower()) else None
378 if args.name is None:
380 collection_name = 'Docker image {} {}'.format(image_repo_tag, image_hash[0:12])
382 collection_name = 'Docker image {}'.format(image_hash[0:12])
384 collection_name = args.name
387 # Check if this image is already in Arvados.
389 # Project where everything should be owned
390 if args.project_uuid:
391 parent_project_uuid = args.project_uuid
393 parent_project_uuid = api.users().current().execute(
394 num_retries=args.retries)['uuid']
396 # Find image hash tags
397 existing_links = _get_docker_links(
399 filters=[['link_class', '=', 'docker_image_hash'],
400 ['name', '=', image_hash]])
402 # get readable collections
403 collections = api.collections().list(
404 filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]],
405 select=["uuid", "owner_uuid", "name", "manifest_text"]
406 ).execute(num_retries=args.retries)['items']
409 # check for repo+tag links on these collections
411 existing_repo_tag = _get_docker_links(
413 filters=[['link_class', '=', 'docker_image_repo+tag'],
414 ['name', '=', image_repo_tag],
415 ['head_uuid', 'in', [c["uuid"] for c in collections]]])
417 existing_repo_tag = []
420 coll_uuid = next(items_owned_by(parent_project_uuid, collections))['uuid']
421 except StopIteration:
422 # create new collection owned by the project
423 coll_uuid = api.collections().create(
424 body={"manifest_text": collections[0]['manifest_text'],
425 "name": collection_name,
426 "owner_uuid": parent_project_uuid},
427 ensure_unique_name=True
428 ).execute(num_retries=args.retries)['uuid']
430 link_base = {'owner_uuid': parent_project_uuid,
431 'head_uuid': coll_uuid,
432 'properties': existing_links[0]['properties']}
434 if not any(items_owned_by(parent_project_uuid, existing_links)):
435 # create image link owned by the project
436 make_link(api, args.retries,
437 'docker_image_hash', image_hash, **link_base)
439 if image_repo_tag and not any(items_owned_by(parent_project_uuid, existing_repo_tag)):
440 # create repo+tag link owned by the project
441 make_link(api, args.retries, 'docker_image_repo+tag',
442 image_repo_tag, **link_base)
444 stdout.write(coll_uuid + "\n")
448 # Open a file for the saved image, and write it if needed.
449 outfile_name = '{}.tar'.format(image_hash)
450 image_file, need_save = prep_image_file(outfile_name)
452 save_image(image_hash, image_file)
454 # Call arv-put with switches we inherited from it
455 # (a.k.a., switches that aren't our own).
456 put_args = keepdocker_parser.parse_known_args(arguments)[1]
458 if args.name is None:
459 put_args += ['--name', collection_name]
461 coll_uuid = arv_put.main(
462 put_args + ['--filename', outfile_name, image_file.name], stdout=stdout).strip()
464 # Read the image metadata and make Arvados links from it.
466 image_tar = tarfile.open(fileobj=image_file)
467 image_hash_type, _, raw_image_hash = image_hash.rpartition(':')
469 json_filename = raw_image_hash + '.json'
471 json_filename = raw_image_hash + '/json'
472 json_file = image_tar.extractfile(image_tar.getmember(json_filename))
473 image_metadata = json.load(json_file)
476 link_base = {'head_uuid': coll_uuid, 'properties': {}}
477 if 'created' in image_metadata:
478 link_base['properties']['image_timestamp'] = image_metadata['created']
479 if args.project_uuid is not None:
480 link_base['owner_uuid'] = args.project_uuid
482 make_link(api, args.retries, 'docker_image_hash', image_hash, **link_base)
484 make_link(api, args.retries,
485 'docker_image_repo+tag', image_repo_tag, **link_base)
489 for filename in [stat_cache_name(image_file), image_file.name]:
492 except OSError as error:
493 if error.errno != errno.ENOENT:
496 if __name__ == '__main__':