14 from collections import namedtuple
18 import arvados.commands._util as arv_cmd
19 import arvados.commands.put as arv_put
21 STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
23 DockerImage = namedtuple('DockerImage',
24 ['repo', 'tag', 'hash', 'created', 'vsize'])
26 keepdocker_parser = argparse.ArgumentParser(add_help=False)
27 keepdocker_parser.add_argument(
28 '-f', '--force', action='store_true', default=False,
29 help="Re-upload the image even if it already exists on the server")
31 _group = keepdocker_parser.add_mutually_exclusive_group()
33 '--pull', action='store_true', default=False,
34 help="Try to pull the latest image from Docker registry")
36 '--no-pull', action='store_false', dest='pull',
37 help="Use locally installed image only, don't pull image from Docker registry (default)")
39 keepdocker_parser.add_argument(
41 help="Docker image to upload, as a repository name or hash")
42 keepdocker_parser.add_argument(
43 'tag', nargs='?', default='latest',
44 help="Tag of the Docker image to upload (default 'latest')")
46 # Combine keepdocker options listed above with run_opts options of arv-put.
47 # The options inherited from arv-put include --name, --project-uuid,
48 # --progress/--no-progress/--batch-progress and --resume/--no-resume.
49 arg_parser = argparse.ArgumentParser(
50 description="Upload or list Docker images in Arvados",
51 parents=[keepdocker_parser, arv_put.run_opts, arv_cmd.retry_opt])
53 class DockerError(Exception):
57 def popen_docker(cmd, *args, **kwargs):
58 manage_stdin = ('stdin' not in kwargs)
59 kwargs.setdefault('stdin', subprocess.PIPE)
60 kwargs.setdefault('stdout', sys.stderr)
62 docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
63 except OSError: # No docker.io in $PATH
64 docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
66 docker_proc.stdin.close()
69 def check_docker(proc, description):
71 if proc.returncode != 0:
72 raise DockerError("docker {} returned status code {}".
73 format(description, proc.returncode))
76 # Yield a DockerImage tuple for each installed image.
77 list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
78 list_output = iter(list_proc.stdout)
79 next(list_output) # Ignore the header line
80 for line in list_output:
82 size_index = len(words) - 2
83 repo, tag, imageid = words[:3]
84 ctime = ' '.join(words[3:size_index])
85 vsize = ' '.join(words[size_index:])
86 yield DockerImage(repo, tag, imageid, ctime, vsize)
87 list_proc.stdout.close()
88 check_docker(list_proc, "images")
90 def find_image_hashes(image_search, image_tag=None):
91 # Given one argument, search for Docker images with matching hashes,
92 # and return their full hashes in a set.
93 # Given two arguments, also search for a Docker image with the
94 # same repository and tag. If one is found, return its hash in a
95 # set; otherwise, fall back to the one-argument hash search.
96 # Returns None if no match is found, or a hash search is ambiguous.
97 hash_search = image_search.lower()
99 for image in docker_images():
100 if (image.repo == image_search) and (image.tag == image_tag):
101 return set([image.hash])
102 elif image.hash.startswith(hash_search):
103 hash_matches.add(image.hash)
106 def find_one_image_hash(image_search, image_tag=None):
107 hashes = find_image_hashes(image_search, image_tag)
108 hash_count = len(hashes)
111 elif hash_count == 0:
112 raise DockerError("no matching image found")
114 raise DockerError("{} images match {}".format(hash_count, image_search))
116 def stat_cache_name(image_file):
117 return getattr(image_file, 'name', image_file) + '.stat'
119 def pull_image(image_name, image_tag):
120 check_docker(popen_docker(['pull', '{}:{}'.format(image_name, image_tag)]),
123 def save_image(image_hash, image_file):
124 # Save the specified Docker image to image_file, then try to save its
125 # stats so we can try to resume after interruption.
126 check_docker(popen_docker(['save', image_hash], stdout=image_file),
130 with open(stat_cache_name(image_file), 'w') as statfile:
131 json.dump(tuple(os.fstat(image_file.fileno())), statfile)
132 except STAT_CACHE_ERRORS:
133 pass # We won't resume from this cache. No big deal.
135 def prep_image_file(filename):
136 # Return a file object ready to save a Docker image,
137 # and a boolean indicating whether or not we need to actually save the
138 # image (False if a cached save is available).
139 cache_dir = arv_cmd.make_home_conf_dir(
140 os.path.join('.cache', 'arvados', 'docker'), 0o700)
141 if cache_dir is None:
142 image_file = tempfile.NamedTemporaryFile(suffix='.tar')
145 file_path = os.path.join(cache_dir, filename)
147 with open(stat_cache_name(file_path)) as statfile:
148 prev_stat = json.load(statfile)
149 now_stat = os.stat(file_path)
150 need_save = any(prev_stat[field] != now_stat[field]
151 for field in [ST_MTIME, ST_SIZE])
152 except STAT_CACHE_ERRORS + (AttributeError, IndexError):
153 need_save = True # We couldn't compare against old stats
154 image_file = open(file_path, 'w+b' if need_save else 'rb')
155 return image_file, need_save
157 def make_link(api_client, num_retries, link_class, link_name, **link_attrs):
158 link_attrs.update({'link_class': link_class, 'name': link_name})
159 return api_client.links().create(body=link_attrs).execute(
160 num_retries=num_retries)
166 return datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%SZ")
168 def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None):
169 """List all Docker images known to the api_client with image_name and
170 image_tag. If no image_name is given, defaults to listing all
173 Returns a list of tuples representing matching Docker images,
174 sorted in preference order (i.e. the first collection in the list
175 is the one that the API server would use). Each tuple is a
176 (collection_uuid, collection_info) pair, where collection_info is
177 a dict with fields "dockerhash", "repo", "tag", and "timestamp".
180 docker_image_filters = [['link_class', 'in', ['docker_image_hash', 'docker_image_repo+tag']]]
182 image_link_name = "{}:{}".format(image_name, image_tag or 'latest')
183 docker_image_filters.append(['name', '=', image_link_name])
185 existing_links = api_client.links().list(
186 filters=docker_image_filters
187 ).execute(num_retries=num_retries)['items']
189 for link in existing_links:
190 collection_uuid = link["head_uuid"]
191 if collection_uuid not in images:
192 images[collection_uuid]= {"dockerhash": "<none>",
195 "timestamp": ptimestamp("1970-01-01T00:00:01Z")}
197 if link["link_class"] == "docker_image_hash":
198 images[collection_uuid]["dockerhash"] = link["name"]
200 if link["link_class"] == "docker_image_repo+tag":
201 r = link["name"].split(":")
202 images[collection_uuid]["repo"] = r[0]
204 images[collection_uuid]["tag"] = r[1]
206 if "image_timestamp" in link["properties"]:
207 images[collection_uuid]["timestamp"] = ptimestamp(link["properties"]["image_timestamp"])
209 images[collection_uuid]["timestamp"] = ptimestamp(link["created_at"])
211 return sorted(images.items(), lambda a, b: cmp(b[1]["timestamp"], a[1]["timestamp"]))
214 def main(arguments=None):
215 args = arg_parser.parse_args(arguments)
216 api = arvados.api('v1')
218 if args.image is None or args.image == 'images':
219 fmt = "{:30} {:10} {:12} {:29} {:20}"
220 print fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED")
221 for i, j in list_images_in_arv(api, args.retries):
222 print(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:12], i, j["timestamp"].strftime("%c")))
225 # Pull the image if requested, unless the image is specified as a hash
226 # that we already have.
227 if args.pull and not find_image_hashes(args.image):
228 pull_image(args.image, args.tag)
231 image_hash = find_one_image_hash(args.image, args.tag)
232 except DockerError as error:
233 print >>sys.stderr, "arv-keepdocker:", error.message
236 image_repo_tag = '{}:{}'.format(args.image, args.tag) if not image_hash.startswith(args.image.lower()) else None
238 if args.name is None:
240 collection_name = 'Docker image {} {}'.format(image_repo_tag, image_hash[0:12])
242 collection_name = 'Docker image {}'.format(image_hash[0:12])
244 collection_name = args.name
247 # Check if this image is already in Arvados.
249 # Project where everything should be owned
250 if args.project_uuid:
251 parent_project_uuid = args.project_uuid
253 parent_project_uuid = api.users().current().execute(
254 num_retries=args.retries)['uuid']
256 # Find image hash tags
257 existing_links = api.links().list(
258 filters=[['link_class', '=', 'docker_image_hash'],
259 ['name', '=', image_hash]]
260 ).execute(num_retries=args.retries)['items']
262 # get readable collections
263 collections = api.collections().list(
264 filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]],
265 select=["uuid", "owner_uuid", "name", "manifest_text"]
266 ).execute(num_retries=args.retries)['items']
269 # check for repo+tag links on these collections
270 existing_repo_tag = (api.links().list(
271 filters=[['link_class', '=', 'docker_image_repo+tag'],
272 ['name', '=', image_repo_tag],
273 ['head_uuid', 'in', collections]]
274 ).execute(num_retries=args.retries)['items']) if image_repo_tag else []
276 # Filter on elements owned by the parent project
277 owned_col = [c for c in collections if c['owner_uuid'] == parent_project_uuid]
278 owned_img = [c for c in existing_links if c['owner_uuid'] == parent_project_uuid]
279 owned_rep = [c for c in existing_repo_tag if c['owner_uuid'] == parent_project_uuid]
282 # already have a collection owned by this project
283 coll_uuid = owned_col[0]['uuid']
285 # create new collection owned by the project
286 coll_uuid = api.collections().create(
287 body={"manifest_text": collections[0]['manifest_text'],
288 "name": collection_name,
289 "owner_uuid": parent_project_uuid},
290 ensure_unique_name=True
291 ).execute(num_retries=args.retries)['uuid']
293 link_base = {'owner_uuid': parent_project_uuid,
294 'head_uuid': coll_uuid }
297 # create image link owned by the project
298 make_link(api, args.retries,
299 'docker_image_hash', image_hash, **link_base)
301 if not owned_rep and image_repo_tag:
302 # create repo+tag link owned by the project
303 make_link(api, args.retries, 'docker_image_repo+tag',
304 image_repo_tag, **link_base)
310 # Open a file for the saved image, and write it if needed.
311 outfile_name = '{}.tar'.format(image_hash)
312 image_file, need_save = prep_image_file(outfile_name)
314 save_image(image_hash, image_file)
316 # Call arv-put with switches we inherited from it
317 # (a.k.a., switches that aren't our own).
318 put_args = keepdocker_parser.parse_known_args(arguments)[1]
320 if args.name is None:
321 put_args += ['--name', collection_name]
323 coll_uuid = arv_put.main(
324 put_args + ['--filename', outfile_name, image_file.name]).strip()
326 # Read the image metadata and make Arvados links from it.
328 image_tar = tarfile.open(fileobj=image_file)
329 json_file = image_tar.extractfile(image_tar.getmember(image_hash + '/json'))
330 image_metadata = json.load(json_file)
333 link_base = {'head_uuid': coll_uuid, 'properties': {}}
334 if 'created' in image_metadata:
335 link_base['properties']['image_timestamp'] = image_metadata['created']
336 if args.project_uuid is not None:
337 link_base['owner_uuid'] = args.project_uuid
339 make_link(api, args.retries, 'docker_image_hash', image_hash, **link_base)
341 make_link(api, args.retries,
342 'docker_image_repo+tag', image_repo_tag, **link_base)
346 for filename in [stat_cache_name(image_file), image_file.name]:
349 except OSError as error:
350 if error.errno != errno.ENOENT:
353 if __name__ == '__main__':