X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/9b0de8314d6297df506734844bb2b833ba7d022c..HEAD:/sdk/python/arvados/commands/keepdocker.py diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py index 2d5c0150c9..188f8be457 100644 --- a/sdk/python/arvados/commands/keepdocker.py +++ b/sdk/python/arvados/commands/keepdocker.py @@ -2,39 +2,36 @@ # # SPDX-License-Identifier: Apache-2.0 -from builtins import next import argparse import collections import datetime import errno +import fcntl import json +import logging import os import re +import subprocess import sys import tarfile import tempfile -import shutil -import _strptime -import fcntl + +import ciso8601 from operator import itemgetter +from pathlib import Path from stat import * -if os.name == "posix" and sys.version_info[0] < 3: - import subprocess32 as subprocess -else: - import subprocess - import arvados +import arvados.config import arvados.util import arvados.commands._util as arv_cmd import arvados.commands.put as arv_put -from arvados.collection import CollectionReader -import ciso8601 -import logging -import arvados.config - from arvados._version import __version__ +from typing import ( + Callable, +) + logger = logging.getLogger('arvados.keepdocker') logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG') else logging.INFO) @@ -189,9 +186,12 @@ def save_image(image_hash, image_file): except STAT_CACHE_ERRORS: pass # We won't resume from this cache. No big deal. -def get_cache_dir(): - return arv_cmd.make_home_conf_dir( - os.path.join('.cache', 'arvados', 'docker'), 0o700) +def get_cache_dir( + mkparent: Callable[[], Path]=arvados.util._BaseDirectories('CACHE').storage_path, +) -> str: + path = mkparent() / 'docker' + path.mkdir(mode=0o700, exist_ok=True) + return str(path) def prep_image_file(filename): # Return a file object ready to save a Docker image, @@ -240,8 +240,9 @@ def docker_link_sort_key(link): return (image_timestamp, created_timestamp) def _get_docker_links(api_client, num_retries, **kwargs): - links = arvados.util.list_all(api_client.links().list, - num_retries, **kwargs) + links = list(arvados.util.keyset_list_all( + api_client.links().list, num_retries=num_retries, **kwargs, + )) for link in links: link['_sort_key'] = docker_link_sort_key(link) links.sort(key=itemgetter('_sort_key'), reverse=True) @@ -340,10 +341,12 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None, images.sort(key=itemgetter('_sort_key'), reverse=True) # Remove any image listings that refer to unknown collections. - existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all( - api_client.collections().list, num_retries, - filters=[['uuid', 'in', [im['collection'] for im in images]]]+project_filter, - select=['uuid'])} + existing_coll_uuids = {coll['uuid'] for coll in arvados.util.keyset_list_all( + api_client.collections().list, + num_retries=num_retries, + filters=[['uuid', 'in', [im['collection'] for im in images]]]+project_filter, + select=['uuid'], + )} return [(image['collection'], image) for image in images if image['collection'] in existing_coll_uuids] @@ -356,10 +359,29 @@ def _uuid2pdh(api, uuid): select=['portable_data_hash'], ).execute()['items'][0]['portable_data_hash'] +def load_image_metadata(image_file): + """Load an image manifest and config from an archive + + Given an image archive as an open binary file object, this function loads + the image manifest and configuration, deserializing each from JSON and + returning them in a 2-tuple of dicts. + """ + image_file.seek(0) + with tarfile.open(fileobj=image_file) as image_tar: + with image_tar.extractfile('manifest.json') as manifest_file: + image_manifest_list = json.load(manifest_file) + # Because arv-keepdocker only saves one image, there should only be + # one manifest. This extracts that from the list and raises + # ValueError if there's not exactly one. + image_manifest, = image_manifest_list + with image_tar.extractfile(image_manifest['Config']) as config_file: + image_config = json.load(config_file) + return image_manifest, image_config + def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None): args = arg_parser.parse_args(arguments) if api is None: - api = arvados.api('v1') + api = arvados.api('v1', num_retries=args.retries) if args.image is None or args.image == 'images': fmt = "{:30} {:10} {:12} {:29} {:20}\n" @@ -532,21 +554,9 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None) # Managed properties could be already set coll_properties = api.collections().get(uuid=coll_uuid).execute(num_retries=args.retries).get('properties', {}) coll_properties.update({"docker-image-repo-tag": image_repo_tag}) - api.collections().update(uuid=coll_uuid, body={"properties": coll_properties}).execute(num_retries=args.retries) - # Read the image metadata and make Arvados links from it. - image_file.seek(0) - image_tar = tarfile.open(fileobj=image_file) - image_hash_type, _, raw_image_hash = image_hash.rpartition(':') - if image_hash_type: - json_filename = raw_image_hash + '.json' - else: - json_filename = raw_image_hash + '/json' - json_file = image_tar.extractfile(image_tar.getmember(json_filename)) - image_metadata = json.loads(json_file.read().decode('utf-8')) - json_file.close() - image_tar.close() + _, image_metadata = load_image_metadata(image_file) link_base = {'head_uuid': coll_uuid, 'properties': {}} if 'created' in image_metadata: link_base['properties']['image_timestamp'] = image_metadata['created']