13 from collections import namedtuple
17 import arvados.commands._util as arv_cmd
18 import arvados.commands.put as arv_put
20 STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
22 DockerImage = namedtuple('DockerImage',
23 ['repo', 'tag', 'hash', 'created', 'vsize'])
25 keepdocker_parser = argparse.ArgumentParser(add_help=False)
26 keepdocker_parser.add_argument(
27 '-f', '--force', action='store_true', default=False,
28 help="Re-upload the image even if it already exists on the server")
30 _group = keepdocker_parser.add_mutually_exclusive_group()
32 '--pull', action='store_true', default=False,
33 help="Try to pull the latest image from Docker registry")
35 '--no-pull', action='store_false', dest='pull',
36 help="Use locally installed image only, don't pull image from Docker registry (default)")
38 keepdocker_parser.add_argument(
40 help="Docker image to upload, as a repository name or hash")
41 keepdocker_parser.add_argument(
42 'tag', nargs='?', default='latest',
43 help="Tag of the Docker image to upload (default 'latest')")
45 # Combine keepdocker options listed above with run_opts options of arv-put.
46 # The options inherited from arv-put include --name, --project-uuid,
47 # --progress/--no-progress/--batch-progress and --resume/--no-resume.
48 arg_parser = argparse.ArgumentParser(
49 description="Upload or list Docker images in Arvados",
50 parents=[keepdocker_parser, arv_put.run_opts, arv_cmd.retry_opt])
52 class DockerError(Exception):
56 def popen_docker(cmd, *args, **kwargs):
57 manage_stdin = ('stdin' not in kwargs)
58 kwargs.setdefault('stdin', subprocess.PIPE)
59 kwargs.setdefault('stdout', sys.stderr)
61 docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
62 except OSError: # No docker.io in $PATH
63 docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
65 docker_proc.stdin.close()
68 def check_docker(proc, description):
70 if proc.returncode != 0:
71 raise DockerError("docker {} returned status code {}".
72 format(description, proc.returncode))
75 # Yield a DockerImage tuple for each installed image.
76 list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
77 list_output = iter(list_proc.stdout)
78 next(list_output) # Ignore the header line
79 for line in list_output:
81 size_index = len(words) - 2
82 repo, tag, imageid = words[:3]
83 ctime = ' '.join(words[3:size_index])
84 vsize = ' '.join(words[size_index:])
85 yield DockerImage(repo, tag, imageid, ctime, vsize)
86 list_proc.stdout.close()
87 check_docker(list_proc, "images")
89 def find_image_hashes(image_search, image_tag=None):
90 # Given one argument, search for Docker images with matching hashes,
91 # and return their full hashes in a set.
92 # Given two arguments, also search for a Docker image with the
93 # same repository and tag. If one is found, return its hash in a
94 # set; otherwise, fall back to the one-argument hash search.
95 # Returns None if no match is found, or a hash search is ambiguous.
96 hash_search = image_search.lower()
98 for image in docker_images():
99 if (image.repo == image_search) and (image.tag == image_tag):
100 return set([image.hash])
101 elif image.hash.startswith(hash_search):
102 hash_matches.add(image.hash)
105 def find_one_image_hash(image_search, image_tag=None):
106 hashes = find_image_hashes(image_search, image_tag)
107 hash_count = len(hashes)
110 elif hash_count == 0:
111 raise DockerError("no matching image found")
113 raise DockerError("{} images match {}".format(hash_count, image_search))
115 def stat_cache_name(image_file):
116 return getattr(image_file, 'name', image_file) + '.stat'
118 def pull_image(image_name, image_tag):
119 check_docker(popen_docker(['pull', '{}:{}'.format(image_name, image_tag)]),
122 def save_image(image_hash, image_file):
123 # Save the specified Docker image to image_file, then try to save its
124 # stats so we can try to resume after interruption.
125 check_docker(popen_docker(['save', image_hash], stdout=image_file),
129 with open(stat_cache_name(image_file), 'w') as statfile:
130 json.dump(tuple(os.fstat(image_file.fileno())), statfile)
131 except STAT_CACHE_ERRORS:
132 pass # We won't resume from this cache. No big deal.
134 def prep_image_file(filename):
135 # Return a file object ready to save a Docker image,
136 # and a boolean indicating whether or not we need to actually save the
137 # image (False if a cached save is available).
138 cache_dir = arv_cmd.make_home_conf_dir(
139 os.path.join('.cache', 'arvados', 'docker'), 0o700)
140 if cache_dir is None:
141 image_file = tempfile.NamedTemporaryFile(suffix='.tar')
144 file_path = os.path.join(cache_dir, filename)
146 with open(stat_cache_name(file_path)) as statfile:
147 prev_stat = json.load(statfile)
148 now_stat = os.stat(file_path)
149 need_save = any(prev_stat[field] != now_stat[field]
150 for field in [ST_MTIME, ST_SIZE])
151 except STAT_CACHE_ERRORS + (AttributeError, IndexError):
152 need_save = True # We couldn't compare against old stats
153 image_file = open(file_path, 'w+b' if need_save else 'rb')
154 return image_file, need_save
156 def make_link(api_client, num_retries, link_class, link_name, **link_attrs):
157 link_attrs.update({'link_class': link_class, 'name': link_name})
158 return api_client.links().create(body=link_attrs).execute(
159 num_retries=num_retries)
165 return datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%SZ")
167 def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None):
168 """List all Docker images known to the api_client with image_name and
169 image_tag. If no image_name is given, defaults to listing all
172 Returns a list of tuples representing matching Docker images,
173 sorted in preference order (i.e. the first collection in the list
174 is the one that the API server would use). Each tuple is a
175 (collection_uuid, collection_info) pair, where collection_info is
176 a dict with fields "dockerhash", "repo", "tag", and "timestamp".
179 docker_image_filters = [['link_class', 'in', ['docker_image_hash', 'docker_image_repo+tag']]]
181 image_link_name = "{}:{}".format(image_name, image_tag or 'latest')
182 docker_image_filters.append(['name', '=', image_link_name])
184 existing_links = api_client.links().list(
185 filters=docker_image_filters
186 ).execute(num_retries=num_retries)['items']
188 for link in existing_links:
189 collection_uuid = link["head_uuid"]
190 if collection_uuid not in images:
191 images[collection_uuid]= {"dockerhash": "<none>",
194 "timestamp": ptimestamp("1970-01-01T00:00:01Z")}
196 if link["link_class"] == "docker_image_hash":
197 images[collection_uuid]["dockerhash"] = link["name"]
199 if link["link_class"] == "docker_image_repo+tag":
200 r = link["name"].split(":")
201 images[collection_uuid]["repo"] = r[0]
203 images[collection_uuid]["tag"] = r[1]
205 if "image_timestamp" in link["properties"]:
206 images[collection_uuid]["timestamp"] = ptimestamp(link["properties"]["image_timestamp"])
208 images[collection_uuid]["timestamp"] = ptimestamp(link["created_at"])
210 return sorted(images.items(), lambda a, b: cmp(b[1]["timestamp"], a[1]["timestamp"]))
213 def main(arguments=None):
214 args = arg_parser.parse_args(arguments)
215 api = arvados.api('v1')
217 if args.image is None or args.image == 'images':
218 fmt = "{:30} {:10} {:12} {:29} {:20}"
219 print fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED")
220 for i, j in list_images_in_arv(api, args.retries):
221 print(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:12], i, j["timestamp"].strftime("%c")))
224 # Pull the image if requested, unless the image is specified as a hash
225 # that we already have.
226 if args.pull and not find_image_hashes(args.image):
227 pull_image(args.image, args.tag)
230 image_hash = find_one_image_hash(args.image, args.tag)
231 except DockerError as error:
232 print >>sys.stderr, "arv-keepdocker:", error.message
235 image_repo_tag = '{}:{}'.format(args.image, args.tag) if not image_hash.startswith(args.image.lower()) else None
237 if args.name is None:
239 collection_name = 'Docker image {} {}'.format(image_repo_tag, image_hash[0:12])
241 collection_name = 'Docker image {}'.format(image_hash[0:12])
243 collection_name = args.name
246 # Check if this image is already in Arvados.
248 # Project where everything should be owned
249 if args.project_uuid:
250 parent_project_uuid = args.project_uuid
252 parent_project_uuid = api.users().current().execute(
253 num_retries=args.retries)['uuid']
255 # Find image hash tags
256 existing_links = api.links().list(
257 filters=[['link_class', '=', 'docker_image_hash'],
258 ['name', '=', image_hash]]
259 ).execute(num_retries=args.retries)['items']
261 # get readable collections
262 collections = api.collections().list(
263 filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]],
264 select=["uuid", "owner_uuid", "name", "manifest_text"]
265 ).execute(num_retries=args.retries)['items']
268 # check for repo+tag links on these collections
269 existing_repo_tag = (api.links().list(
270 filters=[['link_class', '=', 'docker_image_repo+tag'],
271 ['name', '=', image_repo_tag],
272 ['head_uuid', 'in', collections]]
273 ).execute(num_retries=args.retries)['items']) if image_repo_tag else []
275 # Filter on elements owned by the parent project
276 owned_col = [c for c in collections if c['owner_uuid'] == parent_project_uuid]
277 owned_img = [c for c in existing_links if c['owner_uuid'] == parent_project_uuid]
278 owned_rep = [c for c in existing_repo_tag if c['owner_uuid'] == parent_project_uuid]
281 # already have a collection owned by this project
282 coll_uuid = owned_col[0]['uuid']
284 # create new collection owned by the project
285 coll_uuid = api.collections().create(
286 body={"manifest_text": collections[0]['manifest_text'],
287 "name": collection_name,
288 "owner_uuid": parent_project_uuid},
289 ensure_unique_name=True
290 ).execute(num_retries=args.retries)['uuid']
292 link_base = {'owner_uuid': parent_project_uuid,
293 'head_uuid': coll_uuid }
296 # create image link owned by the project
297 make_link(api, args.retries,
298 'docker_image_hash', image_hash, **link_base)
300 if not owned_rep and image_repo_tag:
301 # create repo+tag link owned by the project
302 make_link(api, args.retries, 'docker_image_repo+tag',
303 image_repo_tag, **link_base)
309 # Open a file for the saved image, and write it if needed.
310 outfile_name = '{}.tar'.format(image_hash)
311 image_file, need_save = prep_image_file(outfile_name)
313 save_image(image_hash, image_file)
315 # Call arv-put with switches we inherited from it
316 # (a.k.a., switches that aren't our own).
317 put_args = keepdocker_parser.parse_known_args(arguments)[1]
319 if args.name is None:
320 put_args += ['--name', collection_name]
322 coll_uuid = arv_put.main(
323 put_args + ['--filename', outfile_name, image_file.name]).strip()
325 # Read the image metadata and make Arvados links from it.
327 image_tar = tarfile.open(fileobj=image_file)
328 json_file = image_tar.extractfile(image_tar.getmember(image_hash + '/json'))
329 image_metadata = json.load(json_file)
332 link_base = {'head_uuid': coll_uuid, 'properties': {}}
333 if 'created' in image_metadata:
334 link_base['properties']['image_timestamp'] = image_metadata['created']
335 if args.project_uuid is not None:
336 link_base['owner_uuid'] = args.project_uuid
338 make_link(api, args.retries, 'docker_image_hash', image_hash, **link_base)
340 make_link(api, args.retries,
341 'docker_image_repo+tag', image_repo_tag, **link_base)
345 for filename in [stat_cache_name(image_file), image_file.name]:
348 except OSError as error:
349 if error.errno != errno.ENOENT:
352 if __name__ == '__main__':