15 from operator import itemgetter
20 import arvados.commands._util as arv_cmd
21 import arvados.commands.put as arv_put
24 from arvados._version import __version__
26 EARLIEST_DATETIME = datetime.datetime(datetime.MINYEAR, 1, 1, 0, 0, 0)
27 STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
29 DockerImage = collections.namedtuple(
30 'DockerImage', ['repo', 'tag', 'hash', 'created', 'vsize'])
32 keepdocker_parser = argparse.ArgumentParser(add_help=False)
33 keepdocker_parser.add_argument(
34 '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
35 help='Print version and exit.')
36 keepdocker_parser.add_argument(
37 '-f', '--force', action='store_true', default=False,
38 help="Re-upload the image even if it already exists on the server")
40 _group = keepdocker_parser.add_mutually_exclusive_group()
42 '--pull', action='store_true', default=False,
43 help="Try to pull the latest image from Docker registry")
45 '--no-pull', action='store_false', dest='pull',
46 help="Use locally installed image only, don't pull image from Docker registry (default)")
48 keepdocker_parser.add_argument(
50 help="Docker image to upload, as a repository name or hash")
51 keepdocker_parser.add_argument(
52 'tag', nargs='?', default='latest',
53 help="Tag of the Docker image to upload (default 'latest')")
55 # Combine keepdocker options listed above with run_opts options of arv-put.
56 # The options inherited from arv-put include --name, --project-uuid,
57 # --progress/--no-progress/--batch-progress and --resume/--no-resume.
58 arg_parser = argparse.ArgumentParser(
59 description="Upload or list Docker images in Arvados",
60 parents=[keepdocker_parser, arv_put.run_opts, arv_cmd.retry_opt])
62 class DockerError(Exception):
66 def popen_docker(cmd, *args, **kwargs):
67 manage_stdin = ('stdin' not in kwargs)
68 kwargs.setdefault('stdin', subprocess.PIPE)
69 kwargs.setdefault('stdout', sys.stderr)
71 docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
72 except OSError: # No docker.io in $PATH
73 docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
75 docker_proc.stdin.close()
78 def check_docker(proc, description):
80 if proc.returncode != 0:
81 raise DockerError("docker {} returned status code {}".
82 format(description, proc.returncode))
85 # Yield a DockerImage tuple for each installed image.
86 list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
87 list_output = iter(list_proc.stdout)
88 next(list_output) # Ignore the header line
89 for line in list_output:
91 size_index = len(words) - 2
92 repo, tag, imageid = words[:3]
93 ctime = ' '.join(words[3:size_index])
94 vsize = ' '.join(words[size_index:])
95 yield DockerImage(repo, tag, imageid, ctime, vsize)
96 list_proc.stdout.close()
97 check_docker(list_proc, "images")
99 def find_image_hashes(image_search, image_tag=None):
100 # Given one argument, search for Docker images with matching hashes,
101 # and return their full hashes in a set.
102 # Given two arguments, also search for a Docker image with the
103 # same repository and tag. If one is found, return its hash in a
104 # set; otherwise, fall back to the one-argument hash search.
105 # Returns None if no match is found, or a hash search is ambiguous.
106 hash_search = image_search.lower()
108 for image in docker_images():
109 if (image.repo == image_search) and (image.tag == image_tag):
110 return set([image.hash])
111 elif image.hash.startswith(hash_search):
112 hash_matches.add(image.hash)
115 def find_one_image_hash(image_search, image_tag=None):
116 hashes = find_image_hashes(image_search, image_tag)
117 hash_count = len(hashes)
120 elif hash_count == 0:
121 raise DockerError("no matching image found")
123 raise DockerError("{} images match {}".format(hash_count, image_search))
125 def stat_cache_name(image_file):
126 return getattr(image_file, 'name', image_file) + '.stat'
128 def pull_image(image_name, image_tag):
129 check_docker(popen_docker(['pull', '{}:{}'.format(image_name, image_tag)]),
132 def save_image(image_hash, image_file):
133 # Save the specified Docker image to image_file, then try to save its
134 # stats so we can try to resume after interruption.
135 check_docker(popen_docker(['save', image_hash], stdout=image_file),
139 with open(stat_cache_name(image_file), 'w') as statfile:
140 json.dump(tuple(os.fstat(image_file.fileno())), statfile)
141 except STAT_CACHE_ERRORS:
142 pass # We won't resume from this cache. No big deal.
144 def prep_image_file(filename):
145 # Return a file object ready to save a Docker image,
146 # and a boolean indicating whether or not we need to actually save the
147 # image (False if a cached save is available).
148 cache_dir = arv_cmd.make_home_conf_dir(
149 os.path.join('.cache', 'arvados', 'docker'), 0o700)
150 if cache_dir is None:
151 image_file = tempfile.NamedTemporaryFile(suffix='.tar')
154 file_path = os.path.join(cache_dir, filename)
156 with open(stat_cache_name(file_path)) as statfile:
157 prev_stat = json.load(statfile)
158 now_stat = os.stat(file_path)
159 need_save = any(prev_stat[field] != now_stat[field]
160 for field in [ST_MTIME, ST_SIZE])
161 except STAT_CACHE_ERRORS + (AttributeError, IndexError):
162 need_save = True # We couldn't compare against old stats
163 image_file = open(file_path, 'w+b' if need_save else 'rb')
164 return image_file, need_save
166 def make_link(api_client, num_retries, link_class, link_name, **link_attrs):
167 link_attrs.update({'link_class': link_class, 'name': link_name})
168 return api_client.links().create(body=link_attrs).execute(
169 num_retries=num_retries)
171 def docker_link_sort_key(link):
172 """Build a sort key to find the latest available Docker image.
174 To find one source collection for a Docker image referenced by
175 name or image id, the API server looks for a link with the most
176 recent `image_timestamp` property; then the most recent
177 `created_at` timestamp. This method generates a sort key for
178 Docker metadata links to sort them from least to most preferred.
181 image_timestamp = ciso8601.parse_datetime_unaware(
182 link['properties']['image_timestamp'])
183 except (KeyError, ValueError):
184 image_timestamp = EARLIEST_DATETIME
185 return (image_timestamp,
186 ciso8601.parse_datetime_unaware(link['created_at']))
188 def _get_docker_links(api_client, num_retries, **kwargs):
189 links = arvados.util.list_all(api_client.links().list,
190 num_retries, **kwargs)
192 link['_sort_key'] = docker_link_sort_key(link)
193 links.sort(key=itemgetter('_sort_key'), reverse=True)
196 def _new_image_listing(link, dockerhash, repo='<none>', tag='<none>'):
197 timestamp_index = 1 if (link['_sort_key'][0] is EARLIEST_DATETIME) else 0
199 '_sort_key': link['_sort_key'],
200 'timestamp': link['_sort_key'][timestamp_index],
201 'collection': link['head_uuid'],
202 'dockerhash': dockerhash,
207 def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None):
208 """List all Docker images known to the api_client with image_name and
209 image_tag. If no image_name is given, defaults to listing all
212 Returns a list of tuples representing matching Docker images,
213 sorted in preference order (i.e. the first collection in the list
214 is the one that the API server would use). Each tuple is a
215 (collection_uuid, collection_info) pair, where collection_info is
216 a dict with fields "dockerhash", "repo", "tag", and "timestamp".
223 # Find images with the name the user specified.
224 search_links = _get_docker_links(
225 api_client, num_retries,
226 filters=[['link_class', '=', 'docker_image_repo+tag'],
228 '{}:{}'.format(image_name, image_tag or 'latest')]])
230 repo_links = search_links
232 # Fall back to finding images with the specified image hash.
233 search_links = _get_docker_links(
234 api_client, num_retries,
235 filters=[['link_class', '=', 'docker_image_hash'],
236 ['name', 'ilike', image_name + '%']])
237 hash_links = search_links
238 # Only list information about images that were found in the search.
239 search_filters.append(['head_uuid', 'in',
240 [link['head_uuid'] for link in search_links]])
242 # It should be reasonable to expect that each collection only has one
243 # image hash (though there may be many links specifying this). Find
244 # the API server's most preferred image hash link for each collection.
245 if hash_links is None:
246 hash_links = _get_docker_links(
247 api_client, num_retries,
248 filters=search_filters + [['link_class', '=', 'docker_image_hash']])
249 hash_link_map = {link['head_uuid']: link for link in reversed(hash_links)}
251 # Each collection may have more than one name (though again, one name
252 # may be specified more than once). Build an image listing from name
253 # tags, sorted by API server preference.
254 if repo_links is None:
255 repo_links = _get_docker_links(
256 api_client, num_retries,
257 filters=search_filters + [['link_class', '=',
258 'docker_image_repo+tag']])
259 seen_image_names = collections.defaultdict(set)
261 for link in repo_links:
262 collection_uuid = link['head_uuid']
263 if link['name'] in seen_image_names[collection_uuid]:
265 seen_image_names[collection_uuid].add(link['name'])
267 dockerhash = hash_link_map[collection_uuid]['name']
269 dockerhash = '<unknown>'
270 name_parts = link['name'].split(':', 1)
271 images.append(_new_image_listing(link, dockerhash, *name_parts))
273 # Find any image hash links that did not have a corresponding name link,
274 # and add image listings for them, retaining the API server preference
276 images_start_size = len(images)
277 for collection_uuid, link in hash_link_map.iteritems():
278 if not seen_image_names[collection_uuid]:
279 images.append(_new_image_listing(link, link['name']))
280 if len(images) > images_start_size:
281 images.sort(key=itemgetter('_sort_key'), reverse=True)
283 # Remove any image listings that refer to unknown collections.
284 existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all(
285 api_client.collections().list, num_retries,
286 filters=[['uuid', 'in', [im['collection'] for im in images]]],
288 return [(image['collection'], image) for image in images
289 if image['collection'] in existing_coll_uuids]
291 def items_owned_by(owner_uuid, arv_items):
292 return (item for item in arv_items if item['owner_uuid'] == owner_uuid)
294 def main(arguments=None, stdout=sys.stdout):
295 args = arg_parser.parse_args(arguments)
296 api = arvados.api('v1')
298 if args.image is None or args.image == 'images':
299 fmt = "{:30} {:10} {:12} {:29} {:20}\n"
300 stdout.write(fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED"))
301 for i, j in list_images_in_arv(api, args.retries):
302 stdout.write(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:12], i, j["timestamp"].strftime("%c")))
305 # Pull the image if requested, unless the image is specified as a hash
306 # that we already have.
307 if args.pull and not find_image_hashes(args.image):
308 pull_image(args.image, args.tag)
311 image_hash = find_one_image_hash(args.image, args.tag)
312 except DockerError as error:
313 print >>sys.stderr, "arv-keepdocker:", error.message
316 image_repo_tag = '{}:{}'.format(args.image, args.tag) if not image_hash.startswith(args.image.lower()) else None
318 if args.name is None:
320 collection_name = 'Docker image {} {}'.format(image_repo_tag, image_hash[0:12])
322 collection_name = 'Docker image {}'.format(image_hash[0:12])
324 collection_name = args.name
327 # Check if this image is already in Arvados.
329 # Project where everything should be owned
330 if args.project_uuid:
331 parent_project_uuid = args.project_uuid
333 parent_project_uuid = api.users().current().execute(
334 num_retries=args.retries)['uuid']
336 # Find image hash tags
337 existing_links = _get_docker_links(
339 filters=[['link_class', '=', 'docker_image_hash'],
340 ['name', '=', image_hash]])
342 # get readable collections
343 collections = api.collections().list(
344 filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]],
345 select=["uuid", "owner_uuid", "name", "manifest_text"]
346 ).execute(num_retries=args.retries)['items']
349 # check for repo+tag links on these collections
351 existing_repo_tag = _get_docker_links(
353 filters=[['link_class', '=', 'docker_image_repo+tag'],
354 ['name', '=', image_repo_tag],
355 ['head_uuid', 'in', collections]])
357 existing_repo_tag = []
360 coll_uuid = next(items_owned_by(parent_project_uuid, collections))['uuid']
361 except StopIteration:
362 # create new collection owned by the project
363 coll_uuid = api.collections().create(
364 body={"manifest_text": collections[0]['manifest_text'],
365 "name": collection_name,
366 "owner_uuid": parent_project_uuid},
367 ensure_unique_name=True
368 ).execute(num_retries=args.retries)['uuid']
370 link_base = {'owner_uuid': parent_project_uuid,
371 'head_uuid': coll_uuid,
372 'properties': existing_links[0]['properties']}
374 if not any(items_owned_by(parent_project_uuid, existing_links)):
375 # create image link owned by the project
376 make_link(api, args.retries,
377 'docker_image_hash', image_hash, **link_base)
379 if image_repo_tag and not any(items_owned_by(parent_project_uuid, existing_repo_tag)):
380 # create repo+tag link owned by the project
381 make_link(api, args.retries, 'docker_image_repo+tag',
382 image_repo_tag, **link_base)
384 stdout.write(coll_uuid + "\n")
388 # Open a file for the saved image, and write it if needed.
389 outfile_name = '{}.tar'.format(image_hash)
390 image_file, need_save = prep_image_file(outfile_name)
392 save_image(image_hash, image_file)
394 # Call arv-put with switches we inherited from it
395 # (a.k.a., switches that aren't our own).
396 put_args = keepdocker_parser.parse_known_args(arguments)[1]
398 if args.name is None:
399 put_args += ['--name', collection_name]
401 coll_uuid = arv_put.main(
402 put_args + ['--filename', outfile_name, image_file.name], stdout=stdout).strip()
404 # Read the image metadata and make Arvados links from it.
406 image_tar = tarfile.open(fileobj=image_file)
407 image_hash_type, _, raw_image_hash = image_hash.rpartition(':')
409 json_filename = raw_image_hash + '.json'
411 json_filename = raw_image_hash + '/json'
412 json_file = image_tar.extractfile(image_tar.getmember(json_filename))
413 image_metadata = json.load(json_file)
416 link_base = {'head_uuid': coll_uuid, 'properties': {}}
417 if 'created' in image_metadata:
418 link_base['properties']['image_timestamp'] = image_metadata['created']
419 if args.project_uuid is not None:
420 link_base['owner_uuid'] = args.project_uuid
422 make_link(api, args.retries, 'docker_image_hash', image_hash, **link_base)
424 make_link(api, args.retries,
425 'docker_image_repo+tag', image_repo_tag, **link_base)
429 for filename in [stat_cache_name(image_file), image_file.name]:
432 except OSError as error:
433 if error.errno != errno.ENOENT:
436 if __name__ == '__main__':