15 from operator import itemgetter
20 import arvados.commands._util as arv_cmd
21 import arvados.commands.put as arv_put
24 EARLIEST_DATETIME = datetime.datetime(datetime.MINYEAR, 1, 1, 0, 0, 0)
25 STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
27 DockerImage = collections.namedtuple(
28 'DockerImage', ['repo', 'tag', 'hash', 'created', 'vsize'])
30 keepdocker_parser = argparse.ArgumentParser(add_help=False)
31 keepdocker_parser.add_argument(
32 '-f', '--force', action='store_true', default=False,
33 help="Re-upload the image even if it already exists on the server")
35 _group = keepdocker_parser.add_mutually_exclusive_group()
37 '--pull', action='store_true', default=False,
38 help="Try to pull the latest image from Docker registry")
40 '--no-pull', action='store_false', dest='pull',
41 help="Use locally installed image only, don't pull image from Docker registry (default)")
43 keepdocker_parser.add_argument(
45 help="Docker image to upload, as a repository name or hash")
46 keepdocker_parser.add_argument(
47 'tag', nargs='?', default='latest',
48 help="Tag of the Docker image to upload (default 'latest')")
50 # Combine keepdocker options listed above with run_opts options of arv-put.
51 # The options inherited from arv-put include --name, --project-uuid,
52 # --progress/--no-progress/--batch-progress and --resume/--no-resume.
53 arg_parser = argparse.ArgumentParser(
54 description="Upload or list Docker images in Arvados",
55 parents=[keepdocker_parser, arv_put.run_opts, arv_cmd.retry_opt])
57 class DockerError(Exception):
61 def popen_docker(cmd, *args, **kwargs):
62 manage_stdin = ('stdin' not in kwargs)
63 kwargs.setdefault('stdin', subprocess.PIPE)
64 kwargs.setdefault('stdout', sys.stderr)
66 docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
67 except OSError: # No docker.io in $PATH
68 docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
70 docker_proc.stdin.close()
73 def check_docker(proc, description):
75 if proc.returncode != 0:
76 raise DockerError("docker {} returned status code {}".
77 format(description, proc.returncode))
80 # Yield a DockerImage tuple for each installed image.
81 list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
82 list_output = iter(list_proc.stdout)
83 next(list_output) # Ignore the header line
84 for line in list_output:
86 size_index = len(words) - 2
87 repo, tag, imageid = words[:3]
88 ctime = ' '.join(words[3:size_index])
89 vsize = ' '.join(words[size_index:])
90 yield DockerImage(repo, tag, imageid, ctime, vsize)
91 list_proc.stdout.close()
92 check_docker(list_proc, "images")
94 def find_image_hashes(image_search, image_tag=None):
95 # Given one argument, search for Docker images with matching hashes,
96 # and return their full hashes in a set.
97 # Given two arguments, also search for a Docker image with the
98 # same repository and tag. If one is found, return its hash in a
99 # set; otherwise, fall back to the one-argument hash search.
100 # Returns None if no match is found, or a hash search is ambiguous.
101 hash_search = image_search.lower()
103 for image in docker_images():
104 if (image.repo == image_search) and (image.tag == image_tag):
105 return set([image.hash])
106 elif image.hash.startswith(hash_search):
107 hash_matches.add(image.hash)
110 def find_one_image_hash(image_search, image_tag=None):
111 hashes = find_image_hashes(image_search, image_tag)
112 hash_count = len(hashes)
115 elif hash_count == 0:
116 raise DockerError("no matching image found")
118 raise DockerError("{} images match {}".format(hash_count, image_search))
120 def stat_cache_name(image_file):
121 return getattr(image_file, 'name', image_file) + '.stat'
123 def pull_image(image_name, image_tag):
124 check_docker(popen_docker(['pull', '{}:{}'.format(image_name, image_tag)]),
127 def save_image(image_hash, image_file):
128 # Save the specified Docker image to image_file, then try to save its
129 # stats so we can try to resume after interruption.
130 check_docker(popen_docker(['save', image_hash], stdout=image_file),
134 with open(stat_cache_name(image_file), 'w') as statfile:
135 json.dump(tuple(os.fstat(image_file.fileno())), statfile)
136 except STAT_CACHE_ERRORS:
137 pass # We won't resume from this cache. No big deal.
139 def prep_image_file(filename):
140 # Return a file object ready to save a Docker image,
141 # and a boolean indicating whether or not we need to actually save the
142 # image (False if a cached save is available).
143 cache_dir = arv_cmd.make_home_conf_dir(
144 os.path.join('.cache', 'arvados', 'docker'), 0o700)
145 if cache_dir is None:
146 image_file = tempfile.NamedTemporaryFile(suffix='.tar')
149 file_path = os.path.join(cache_dir, filename)
151 with open(stat_cache_name(file_path)) as statfile:
152 prev_stat = json.load(statfile)
153 now_stat = os.stat(file_path)
154 need_save = any(prev_stat[field] != now_stat[field]
155 for field in [ST_MTIME, ST_SIZE])
156 except STAT_CACHE_ERRORS + (AttributeError, IndexError):
157 need_save = True # We couldn't compare against old stats
158 image_file = open(file_path, 'w+b' if need_save else 'rb')
159 return image_file, need_save
161 def make_link(api_client, num_retries, link_class, link_name, **link_attrs):
162 link_attrs.update({'link_class': link_class, 'name': link_name})
163 return api_client.links().create(body=link_attrs).execute(
164 num_retries=num_retries)
166 def docker_link_sort_key(link):
167 """Build a sort key to find the latest available Docker image.
169 To find one source collection for a Docker image referenced by
170 name or image id, the API server looks for a link with the most
171 recent `image_timestamp` property; then the most recent
172 `created_at` timestamp. This method generates a sort key for
173 Docker metadata links to sort them from least to most preferred.
176 image_timestamp = ciso8601.parse_datetime_unaware(
177 link['properties']['image_timestamp'])
178 except (KeyError, ValueError):
179 image_timestamp = EARLIEST_DATETIME
180 return (image_timestamp,
181 ciso8601.parse_datetime_unaware(link['created_at']))
183 def _get_docker_links(api_client, num_retries, **kwargs):
184 links = arvados.util.list_all(api_client.links().list,
185 num_retries, **kwargs)
187 link['_sort_key'] = docker_link_sort_key(link)
188 links.sort(key=itemgetter('_sort_key'), reverse=True)
191 def _new_image_listing(link, dockerhash, repo='<none>', tag='<none>'):
192 timestamp_index = 1 if (link['_sort_key'][0] is EARLIEST_DATETIME) else 0
194 '_sort_key': link['_sort_key'],
195 'timestamp': link['_sort_key'][timestamp_index],
196 'collection': link['head_uuid'],
197 'dockerhash': dockerhash,
202 def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None):
203 """List all Docker images known to the api_client with image_name and
204 image_tag. If no image_name is given, defaults to listing all
207 Returns a list of tuples representing matching Docker images,
208 sorted in preference order (i.e. the first collection in the list
209 is the one that the API server would use). Each tuple is a
210 (collection_uuid, collection_info) pair, where collection_info is
211 a dict with fields "dockerhash", "repo", "tag", and "timestamp".
218 # Find images with the name the user specified.
219 search_links = _get_docker_links(
220 api_client, num_retries,
221 filters=[['link_class', '=', 'docker_image_repo+tag'],
223 '{}:{}'.format(image_name, image_tag or 'latest')]])
225 repo_links = search_links
227 # Fall back to finding images with the specified image hash.
228 search_links = _get_docker_links(
229 api_client, num_retries,
230 filters=[['link_class', '=', 'docker_image_hash'],
231 ['name', 'ilike', image_name + '%']])
232 hash_links = search_links
233 # Only list information about images that were found in the search.
234 search_filters.append(['head_uuid', 'in',
235 [link['head_uuid'] for link in search_links]])
237 # It should be reasonable to expect that each collection only has one
238 # image hash (though there may be many links specifying this). Find
239 # the API server's most preferred image hash link for each collection.
240 if hash_links is None:
241 hash_links = _get_docker_links(
242 api_client, num_retries,
243 filters=search_filters + [['link_class', '=', 'docker_image_hash']])
244 hash_link_map = {link['head_uuid']: link for link in reversed(hash_links)}
246 # Each collection may have more than one name (though again, one name
247 # may be specified more than once). Build an image listing from name
248 # tags, sorted by API server preference.
249 if repo_links is None:
250 repo_links = _get_docker_links(
251 api_client, num_retries,
252 filters=search_filters + [['link_class', '=',
253 'docker_image_repo+tag']])
254 seen_image_names = collections.defaultdict(set)
256 for link in repo_links:
257 collection_uuid = link['head_uuid']
258 if link['name'] in seen_image_names[collection_uuid]:
260 seen_image_names[collection_uuid].add(link['name'])
262 dockerhash = hash_link_map[collection_uuid]['name']
264 dockerhash = '<unknown>'
265 name_parts = link['name'].split(':', 1)
266 images.append(_new_image_listing(link, dockerhash, *name_parts))
268 # Find any image hash links that did not have a corresponding name link,
269 # and add image listings for them, retaining the API server preference
271 images_start_size = len(images)
272 for collection_uuid, link in hash_link_map.iteritems():
273 if not seen_image_names[collection_uuid]:
274 images.append(_new_image_listing(link, link['name']))
275 if len(images) > images_start_size:
276 images.sort(key=itemgetter('_sort_key'), reverse=True)
278 # Remove any image listings that refer to unknown collections.
279 existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all(
280 api_client.collections().list, num_retries,
281 filters=[['uuid', 'in', [im['collection'] for im in images]]],
283 return [(image['collection'], image) for image in images
284 if image['collection'] in existing_coll_uuids]
286 def main(arguments=None, stdout=sys.stdout):
287 args = arg_parser.parse_args(arguments)
288 api = arvados.api('v1')
290 if args.image is None or args.image == 'images':
291 fmt = "{:30} {:10} {:12} {:29} {:20}\n"
292 stdout.write(fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED"))
293 for i, j in list_images_in_arv(api, args.retries):
294 stdout.write(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:12], i, j["timestamp"].strftime("%c")))
297 # Pull the image if requested, unless the image is specified as a hash
298 # that we already have.
299 if args.pull and not find_image_hashes(args.image):
300 pull_image(args.image, args.tag)
303 image_hash = find_one_image_hash(args.image, args.tag)
304 except DockerError as error:
305 print >>sys.stderr, "arv-keepdocker:", error.message
308 image_repo_tag = '{}:{}'.format(args.image, args.tag) if not image_hash.startswith(args.image.lower()) else None
310 if args.name is None:
312 collection_name = 'Docker image {} {}'.format(image_repo_tag, image_hash[0:12])
314 collection_name = 'Docker image {}'.format(image_hash[0:12])
316 collection_name = args.name
319 # Check if this image is already in Arvados.
321 # Project where everything should be owned
322 if args.project_uuid:
323 parent_project_uuid = args.project_uuid
325 parent_project_uuid = api.users().current().execute(
326 num_retries=args.retries)['uuid']
328 # Find image hash tags
329 existing_links = api.links().list(
330 filters=[['link_class', '=', 'docker_image_hash'],
331 ['name', '=', image_hash]]
332 ).execute(num_retries=args.retries)['items']
334 # get readable collections
335 collections = api.collections().list(
336 filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]],
337 select=["uuid", "owner_uuid", "name", "manifest_text"]
338 ).execute(num_retries=args.retries)['items']
341 # check for repo+tag links on these collections
342 existing_repo_tag = (api.links().list(
343 filters=[['link_class', '=', 'docker_image_repo+tag'],
344 ['name', '=', image_repo_tag],
345 ['head_uuid', 'in', collections]]
346 ).execute(num_retries=args.retries)['items']) if image_repo_tag else []
348 # Filter on elements owned by the parent project
349 owned_col = [c for c in collections if c['owner_uuid'] == parent_project_uuid]
350 owned_img = [c for c in existing_links if c['owner_uuid'] == parent_project_uuid]
351 owned_rep = [c for c in existing_repo_tag if c['owner_uuid'] == parent_project_uuid]
354 # already have a collection owned by this project
355 coll_uuid = owned_col[0]['uuid']
357 # create new collection owned by the project
358 coll_uuid = api.collections().create(
359 body={"manifest_text": collections[0]['manifest_text'],
360 "name": collection_name,
361 "owner_uuid": parent_project_uuid},
362 ensure_unique_name=True
363 ).execute(num_retries=args.retries)['uuid']
365 link_base = {'owner_uuid': parent_project_uuid,
366 'head_uuid': coll_uuid }
369 # create image link owned by the project
370 make_link(api, args.retries,
371 'docker_image_hash', image_hash, **link_base)
373 if not owned_rep and image_repo_tag:
374 # create repo+tag link owned by the project
375 make_link(api, args.retries, 'docker_image_repo+tag',
376 image_repo_tag, **link_base)
378 stdout.write(coll_uuid + "\n")
382 # Open a file for the saved image, and write it if needed.
383 outfile_name = '{}.tar'.format(image_hash)
384 image_file, need_save = prep_image_file(outfile_name)
386 save_image(image_hash, image_file)
388 # Call arv-put with switches we inherited from it
389 # (a.k.a., switches that aren't our own).
390 put_args = keepdocker_parser.parse_known_args(arguments)[1]
392 if args.name is None:
393 put_args += ['--name', collection_name]
395 coll_uuid = arv_put.main(
396 put_args + ['--filename', outfile_name, image_file.name]).strip()
398 # Read the image metadata and make Arvados links from it.
400 image_tar = tarfile.open(fileobj=image_file)
401 json_file = image_tar.extractfile(image_tar.getmember(image_hash + '/json'))
402 image_metadata = json.load(json_file)
405 link_base = {'head_uuid': coll_uuid, 'properties': {}}
406 if 'created' in image_metadata:
407 link_base['properties']['image_timestamp'] = image_metadata['created']
408 if args.project_uuid is not None:
409 link_base['owner_uuid'] = args.project_uuid
411 make_link(api, args.retries, 'docker_image_hash', image_hash, **link_base)
413 make_link(api, args.retries,
414 'docker_image_repo+tag', image_repo_tag, **link_base)
418 for filename in [stat_cache_name(image_file), image_file.name]:
421 except OSError as error:
422 if error.errno != errno.ENOENT:
425 if __name__ == '__main__':