X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/9bbb935dc521f16b2b2b3d7c99653fa20914d90d..HEAD:/sdk/python/arvados/commands/keepdocker.py diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py index c8f6e78087..188f8be457 100644 --- a/sdk/python/arvados/commands/keepdocker.py +++ b/sdk/python/arvados/commands/keepdocker.py @@ -2,39 +2,36 @@ # # SPDX-License-Identifier: Apache-2.0 -from builtins import next import argparse import collections import datetime import errno +import fcntl import json +import logging import os import re +import subprocess import sys import tarfile import tempfile -import shutil -import _strptime -import fcntl + +import ciso8601 from operator import itemgetter +from pathlib import Path from stat import * -if os.name == "posix" and sys.version_info[0] < 3: - import subprocess32 as subprocess -else: - import subprocess - import arvados +import arvados.config import arvados.util import arvados.commands._util as arv_cmd import arvados.commands.put as arv_put -from arvados.collection import CollectionReader -import ciso8601 -import logging -import arvados.config - from arvados._version import __version__ +from typing import ( + Callable, +) + logger = logging.getLogger('arvados.keepdocker') logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG') else logging.INFO) @@ -189,9 +186,12 @@ def save_image(image_hash, image_file): except STAT_CACHE_ERRORS: pass # We won't resume from this cache. No big deal. -def get_cache_dir(): - return arv_cmd.make_home_conf_dir( - os.path.join('.cache', 'arvados', 'docker'), 0o700) +def get_cache_dir( + mkparent: Callable[[], Path]=arvados.util._BaseDirectories('CACHE').storage_path, +) -> str: + path = mkparent() / 'docker' + path.mkdir(mode=0o700, exist_ok=True) + return str(path) def prep_image_file(filename): # Return a file object ready to save a Docker image, @@ -240,8 +240,9 @@ def docker_link_sort_key(link): return (image_timestamp, created_timestamp) def _get_docker_links(api_client, num_retries, **kwargs): - links = arvados.util.list_all(api_client.links().list, - num_retries, **kwargs) + links = list(arvados.util.keyset_list_all( + api_client.links().list, num_retries=num_retries, **kwargs, + )) for link in links: link['_sort_key'] = docker_link_sort_key(link) links.sort(key=itemgetter('_sort_key'), reverse=True) @@ -258,7 +259,7 @@ def _new_image_listing(link, dockerhash, repo='', tag=''): 'tag': tag, } -def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None): +def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None, project_uuid=None): """List all Docker images known to the api_client with image_name and image_tag. If no image_name is given, defaults to listing all Docker images. @@ -273,13 +274,18 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None) search_filters = [] repo_links = None hash_links = None + + project_filter = [] + if project_uuid is not None: + project_filter = [["owner_uuid", "=", project_uuid]] + if image_name: # Find images with the name the user specified. search_links = _get_docker_links( api_client, num_retries, filters=[['link_class', '=', 'docker_image_repo+tag'], ['name', '=', - '{}:{}'.format(image_name, image_tag or 'latest')]]) + '{}:{}'.format(image_name, image_tag or 'latest')]]+project_filter) if search_links: repo_links = search_links else: @@ -287,7 +293,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None) search_links = _get_docker_links( api_client, num_retries, filters=[['link_class', '=', 'docker_image_hash'], - ['name', 'ilike', image_name + '%']]) + ['name', 'ilike', image_name + '%']]+project_filter) hash_links = search_links # Only list information about images that were found in the search. search_filters.append(['head_uuid', 'in', @@ -299,7 +305,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None) if hash_links is None: hash_links = _get_docker_links( api_client, num_retries, - filters=search_filters + [['link_class', '=', 'docker_image_hash']]) + filters=search_filters + [['link_class', '=', 'docker_image_hash']]+project_filter) hash_link_map = {link['head_uuid']: link for link in reversed(hash_links)} # Each collection may have more than one name (though again, one name @@ -309,7 +315,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None) repo_links = _get_docker_links( api_client, num_retries, filters=search_filters + [['link_class', '=', - 'docker_image_repo+tag']]) + 'docker_image_repo+tag']]+project_filter) seen_image_names = collections.defaultdict(set) images = [] for link in repo_links: @@ -321,7 +327,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None) dockerhash = hash_link_map[collection_uuid]['name'] except KeyError: dockerhash = '' - name_parts = link['name'].split(':', 1) + name_parts = link['name'].rsplit(':', 1) images.append(_new_image_listing(link, dockerhash, *name_parts)) # Find any image hash links that did not have a corresponding name link, @@ -335,10 +341,12 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None) images.sort(key=itemgetter('_sort_key'), reverse=True) # Remove any image listings that refer to unknown collections. - existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all( - api_client.collections().list, num_retries, - filters=[['uuid', 'in', [im['collection'] for im in images]]], - select=['uuid'])} + existing_coll_uuids = {coll['uuid'] for coll in arvados.util.keyset_list_all( + api_client.collections().list, + num_retries=num_retries, + filters=[['uuid', 'in', [im['collection'] for im in images]]]+project_filter, + select=['uuid'], + )} return [(image['collection'], image) for image in images if image['collection'] in existing_coll_uuids] @@ -351,10 +359,29 @@ def _uuid2pdh(api, uuid): select=['portable_data_hash'], ).execute()['items'][0]['portable_data_hash'] +def load_image_metadata(image_file): + """Load an image manifest and config from an archive + + Given an image archive as an open binary file object, this function loads + the image manifest and configuration, deserializing each from JSON and + returning them in a 2-tuple of dicts. + """ + image_file.seek(0) + with tarfile.open(fileobj=image_file) as image_tar: + with image_tar.extractfile('manifest.json') as manifest_file: + image_manifest_list = json.load(manifest_file) + # Because arv-keepdocker only saves one image, there should only be + # one manifest. This extracts that from the list and raises + # ValueError if there's not exactly one. + image_manifest, = image_manifest_list + with image_tar.extractfile(image_manifest['Config']) as config_file: + image_config = json.load(config_file) + return image_manifest, image_config + def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None): args = arg_parser.parse_args(arguments) if api is None: - api = arvados.api('v1') + api = arvados.api('v1', num_retries=args.retries) if args.image is None or args.image == 'images': fmt = "{:30} {:10} {:12} {:29} {:20}\n" @@ -381,6 +408,16 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None) elif args.tag is None: args.tag = 'latest' + if '/' in args.image: + hostport, path = args.image.split('/', 1) + if hostport.endswith(':443'): + # "docker pull host:443/asdf" transparently removes the + # :443 (which is redundant because https is implied) and + # after it succeeds "docker images" will list "host/asdf", + # not "host:443/asdf". If we strip the :443 then the name + # doesn't change underneath us. + args.image = '/'.join([hostport[:-4], path]) + # Pull the image if requested, unless the image is specified as a hash # that we already have. if args.pull and not find_image_hashes(args.image): @@ -517,21 +554,9 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None) # Managed properties could be already set coll_properties = api.collections().get(uuid=coll_uuid).execute(num_retries=args.retries).get('properties', {}) coll_properties.update({"docker-image-repo-tag": image_repo_tag}) - api.collections().update(uuid=coll_uuid, body={"properties": coll_properties}).execute(num_retries=args.retries) - # Read the image metadata and make Arvados links from it. - image_file.seek(0) - image_tar = tarfile.open(fileobj=image_file) - image_hash_type, _, raw_image_hash = image_hash.rpartition(':') - if image_hash_type: - json_filename = raw_image_hash + '.json' - else: - json_filename = raw_image_hash + '/json' - json_file = image_tar.extractfile(image_tar.getmember(json_filename)) - image_metadata = json.loads(json_file.read().decode('utf-8')) - json_file.close() - image_tar.close() + _, image_metadata = load_image_metadata(image_file) link_base = {'head_uuid': coll_uuid, 'properties': {}} if 'created' in image_metadata: link_base['properties']['image_timestamp'] = image_metadata['created']