13 from collections import namedtuple
17 import arvados.commands._util as arv_cmd
18 import arvados.commands.put as arv_put
20 STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
22 DockerImage = namedtuple('DockerImage',
23 ['repo', 'tag', 'hash', 'created', 'vsize'])
25 opt_parser = argparse.ArgumentParser(add_help=False)
26 opt_parser.add_argument(
27 '-f', '--force', action='store_true', default=False,
28 help="Re-upload the image even if it already exists on the server")
30 opt_parser.add_argument(
32 help="Add the Docker image and metadata to the specified project. Goes into user 'home' project by default.")
33 opt_parser.add_argument(
35 help="Name to use for the collection that will contain the docker image.")
37 _group = opt_parser.add_mutually_exclusive_group()
39 '--pull', action='store_true', default=False,
40 help="Try to pull the latest image from Docker registry")
42 '--no-pull', action='store_false', dest='pull',
43 help="Use locally installed image only, don't pull image from Docker registry (default)")
45 opt_parser.add_argument(
47 help="Docker image to upload, as a repository name or hash")
48 opt_parser.add_argument(
49 'tag', nargs='?', default='latest',
50 help="Tag of the Docker image to upload (default 'latest')")
52 arg_parser = argparse.ArgumentParser(
53 description="Upload or list Docker images in Arvados",
54 parents=[opt_parser, arv_put.run_opts])
56 class DockerError(Exception):
60 def popen_docker(cmd, *args, **kwargs):
61 manage_stdin = ('stdin' not in kwargs)
62 kwargs.setdefault('stdin', subprocess.PIPE)
63 kwargs.setdefault('stdout', sys.stderr)
65 docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
66 except OSError: # No docker.io in $PATH
67 docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
69 docker_proc.stdin.close()
72 def check_docker(proc, description):
74 if proc.returncode != 0:
75 raise DockerError("docker {} returned status code {}".
76 format(description, proc.returncode))
79 # Yield a DockerImage tuple for each installed image.
80 list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
81 list_output = iter(list_proc.stdout)
82 next(list_output) # Ignore the header line
83 for line in list_output:
85 size_index = len(words) - 2
86 repo, tag, imageid = words[:3]
87 ctime = ' '.join(words[3:size_index])
88 vsize = ' '.join(words[size_index:])
89 yield DockerImage(repo, tag, imageid, ctime, vsize)
90 list_proc.stdout.close()
91 check_docker(list_proc, "images")
93 def find_image_hashes(image_search, image_tag=None):
94 # Given one argument, search for Docker images with matching hashes,
95 # and return their full hashes in a set.
96 # Given two arguments, also search for a Docker image with the
97 # same repository and tag. If one is found, return its hash in a
98 # set; otherwise, fall back to the one-argument hash search.
99 # Returns None if no match is found, or a hash search is ambiguous.
100 hash_search = image_search.lower()
102 for image in docker_images():
103 if (image.repo == image_search) and (image.tag == image_tag):
104 return set([image.hash])
105 elif image.hash.startswith(hash_search):
106 hash_matches.add(image.hash)
109 def find_one_image_hash(image_search, image_tag=None):
110 hashes = find_image_hashes(image_search, image_tag)
111 hash_count = len(hashes)
114 elif hash_count == 0:
115 raise DockerError("no matching image found")
117 raise DockerError("{} images match {}".format(hash_count, image_search))
119 def stat_cache_name(image_file):
120 return getattr(image_file, 'name', image_file) + '.stat'
122 def pull_image(image_name, image_tag):
123 check_docker(popen_docker(['pull', '-t', image_tag, image_name]), "pull")
125 def save_image(image_hash, image_file):
126 # Save the specified Docker image to image_file, then try to save its
127 # stats so we can try to resume after interruption.
128 check_docker(popen_docker(['save', image_hash], stdout=image_file),
132 with open(stat_cache_name(image_file), 'w') as statfile:
133 json.dump(tuple(os.fstat(image_file.fileno())), statfile)
134 except STAT_CACHE_ERRORS:
135 pass # We won't resume from this cache. No big deal.
137 def prep_image_file(filename):
138 # Return a file object ready to save a Docker image,
139 # and a boolean indicating whether or not we need to actually save the
140 # image (False if a cached save is available).
141 cache_dir = arv_cmd.make_home_conf_dir(
142 os.path.join('.cache', 'arvados', 'docker'), 0o700)
143 if cache_dir is None:
144 image_file = tempfile.NamedTemporaryFile(suffix='.tar')
147 file_path = os.path.join(cache_dir, filename)
149 with open(stat_cache_name(file_path)) as statfile:
150 prev_stat = json.load(statfile)
151 now_stat = os.stat(file_path)
152 need_save = any(prev_stat[field] != now_stat[field]
153 for field in [ST_MTIME, ST_SIZE])
154 except STAT_CACHE_ERRORS + (AttributeError, IndexError):
155 need_save = True # We couldn't compare against old stats
156 image_file = open(file_path, 'w+b' if need_save else 'rb')
157 return image_file, need_save
159 def make_link(link_class, link_name, **link_attrs):
160 link_attrs.update({'link_class': link_class, 'name': link_name})
161 return arvados.api('v1').links().create(body=link_attrs).execute()
167 return datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%SZ")
169 def list_images_in_arv():
170 existing_links = arvados.api('v1').links().list(filters=[['link_class', 'in', ['docker_image_hash', 'docker_image_repo+tag']]]).execute()['items']
172 for link in existing_links:
173 collection_uuid = link["head_uuid"]
174 if collection_uuid not in images:
175 images[collection_uuid]= {"dockerhash": "<none>",
178 "timestamp": ptimestamp("1970-01-01T00:00:01Z")}
180 if link["link_class"] == "docker_image_hash":
181 images[collection_uuid]["dockerhash"] = link["name"]
183 if link["link_class"] == "docker_image_repo+tag":
184 r = link["name"].split(":")
185 images[collection_uuid]["repo"] = r[0]
187 images[collection_uuid]["tag"] = r[1]
189 if "image_timestamp" in link["properties"]:
190 images[collection_uuid]["timestamp"] = ptimestamp(link["properties"]["image_timestamp"])
192 images[collection_uuid]["timestamp"] = ptimestamp(link["created_at"])
194 st = sorted(images.items(), lambda a, b: cmp(b[1]["timestamp"], a[1]["timestamp"]))
196 fmt = "{:30} {:10} {:12} {:38} {:20}"
197 print fmt.format("REPOSITORY", "TAG", "IMAGE ID", "KEEP LOCATOR", "CREATED")
199 print(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:11], i, j["timestamp"].strftime("%c")))
201 def main(arguments=None):
202 args = arg_parser.parse_args(arguments)
204 if args.image is None or args.image == 'images':
208 # Pull the image if requested, unless the image is specified as a hash
209 # that we already have.
210 if args.pull and not find_image_hashes(args.image):
211 pull_image(args.image, args.tag)
214 image_hash = find_one_image_hash(args.image, args.tag)
215 except DockerError as error:
216 print >>sys.stderr, "arv-keepdocker:", error.message
219 # Abort if this image is already in Arvados.
220 existing_links = arvados.api('v1').links().list(
221 filters=[['link_class', '=', 'docker_image_hash'],
222 ['name', '=', image_hash]]).execute()['items']
225 "arv-keepdocker: Image {} already stored in collection(s):".
227 message.extend(link['head_uuid'] for link in existing_links)
228 print >>sys.stderr, "\n".join(message)
231 # Open a file for the saved image, and write it if needed.
232 outfile_name = '{}.tar'.format(image_hash)
233 image_file, need_save = prep_image_file(outfile_name)
235 save_image(image_hash, image_file)
237 # Call arv-put with switches we inherited from it
238 # (a.k.a., switches that aren't our own).
239 put_args = opt_parser.parse_known_args(arguments)[1]
241 if args.name is None:
242 put_args += ['--name', 'Docker image {}:{} {}'.format(args.image, args.tag, image_hash[0:11])]
244 put_args += ['--name', args.name]
246 if args.project_uuid is not None:
247 put_args += ['--project-uuid', args.project_uuid]
249 coll_uuid = arv_put.main(
250 put_args + ['--filename', outfile_name, image_file.name]).strip()
252 # Read the image metadata and make Arvados links from it.
254 image_tar = tarfile.open(fileobj=image_file)
255 json_file = image_tar.extractfile(image_tar.getmember(image_hash + '/json'))
256 image_metadata = json.load(json_file)
259 link_base = {'head_uuid': coll_uuid, 'properties': {}}
260 if 'created' in image_metadata:
261 link_base['properties']['image_timestamp'] = image_metadata['created']
262 if args.project_uuid is not None:
263 link_base['owner_uuid'] = args.project_uuid
265 make_link('docker_image_hash', image_hash, **link_base)
266 if not image_hash.startswith(args.image.lower()):
267 make_link('docker_image_repo+tag', '{}:{}'.format(args.image, args.tag),
272 for filename in [stat_cache_name(image_file), image_file.name]:
275 except OSError as error:
276 if error.errno != errno.ENOENT:
279 if __name__ == '__main__':