X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/386e1eefaac2021805f73732b10e9f543c221593..c62e8c7eb9f864f6d3a8328af83572f0f05958da:/sdk/cwl/arvados_cwl/arvdocker.py diff --git a/sdk/cwl/arvados_cwl/arvdocker.py b/sdk/cwl/arvados_cwl/arvdocker.py index 6b736a5a7d..ae5a434074 100644 --- a/sdk/cwl/arvados_cwl/arvdocker.py +++ b/sdk/cwl/arvados_cwl/arvdocker.py @@ -6,6 +6,8 @@ import logging import sys import threading import copy +import re +import subprocess from schema_salad.sourceline import SourceLine @@ -15,36 +17,116 @@ import arvados.commands.keepdocker logger = logging.getLogger('arvados.cwl-runner') -cached_lookups = {} -cached_lookups_lock = threading.Lock() - -def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid): +def determine_image_id(dockerImageId): + for line in ( + str(subprocess.check_output( # nosec + ["docker", "images", "--no-trunc", "--all"] + ), "utf-8") + .splitlines() + ): + try: + match = re.match(r"^([^ ]+)\s+([^ ]+)\s+([^ ]+)", line) + split = dockerImageId.split(":") + if len(split) == 1: + split.append("latest") + elif len(split) == 2: + # if split[1] doesn't match valid tag names, it is a part of repository + if not re.match(r"[\w][\w.-]{0,127}", split[1]): + split[0] = split[0] + ":" + split[1] + split[1] = "latest" + elif len(split) == 3: + if re.match(r"[\w][\w.-]{0,127}", split[2]): + split[0] = split[0] + ":" + split[1] + split[1] = split[2] + del split[2] + + # check for repository:tag match or image id match + if match and ( + (split[0] == match.group(1) and split[1] == match.group(2)) + or dockerImageId == match.group(3) + ): + return match.group(3) + except ValueError: + pass + + return None + + +def arv_docker_get_image(api_client, dockerRequirement, pull_image, runtimeContext): """Check if a Docker image is available in Keep, if not, upload it using arv-keepdocker.""" + project_uuid = runtimeContext.project_uuid + force_pull = runtimeContext.force_docker_pull + tmp_outdir_prefix = runtimeContext.tmp_outdir_prefix + match_local_docker = runtimeContext.match_local_docker + copy_deps = runtimeContext.copy_deps + cached_lookups = runtimeContext.cached_docker_lookups + + if "http://arvados.org/cwl#dockerCollectionPDH" in dockerRequirement: + return dockerRequirement["http://arvados.org/cwl#dockerCollectionPDH"] + if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement: dockerRequirement = copy.deepcopy(dockerRequirement) dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"] if hasattr(dockerRequirement, 'lc'): dockerRequirement.lc.data["dockerImageId"] = dockerRequirement.lc.data["dockerPull"] - global cached_lookups - global cached_lookups_lock - with cached_lookups_lock: - if dockerRequirement["dockerImageId"] in cached_lookups: - return dockerRequirement["dockerImageId"] + if dockerRequirement["dockerImageId"] in cached_lookups: + return cached_lookups[dockerRequirement["dockerImageId"]] - with SourceLine(dockerRequirement, "dockerImageId", WorkflowException): + with SourceLine(dockerRequirement, "dockerImageId", WorkflowException, logger.isEnabledFor(logging.DEBUG)): sp = dockerRequirement["dockerImageId"].split(":") image_name = sp[0] image_tag = sp[1] if len(sp) > 1 else "latest" - images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3, + out_of_project_images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3, image_name=image_name, - image_tag=image_tag) + image_tag=image_tag, + project_uuid=None) + + if copy_deps: + # Only images that are available in the destination project + images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3, + image_name=image_name, + image_tag=image_tag, + project_uuid=project_uuid) + else: + images = out_of_project_images + + if match_local_docker: + local_image_id = determine_image_id(dockerRequirement["dockerImageId"]) + if local_image_id: + # find it in the list + found = False + for i in images: + if i[1]["dockerhash"] == local_image_id: + found = True + images = [i] + break + if not found: + # force re-upload. + images = [] + + for i in out_of_project_images: + if i[1]["dockerhash"] == local_image_id: + found = True + out_of_project_images = [i] + break + if not found: + # force re-upload. + out_of_project_images = [] if not images: - # Fetch Docker image if necessary. - cwltool.docker.get_image(dockerRequirement, pull_image) + if not out_of_project_images: + # Fetch Docker image if necessary. + try: + dockerjob = cwltool.docker.DockerCommandLineJob(None, None, None, None, None, None) + result = dockerjob.get_image(dockerRequirement, pull_image, + force_pull, tmp_outdir_prefix) + if not result: + raise WorkflowException("Docker image '%s' not available" % dockerRequirement["dockerImageId"]) + except OSError as e: + raise WorkflowException("While trying to get Docker image '%s', failed to execute 'docker': %s" % (dockerRequirement["dockerImageId"], e)) # Upload image to Arvados args = [] @@ -54,25 +136,23 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid args.append(image_tag) logger.info("Uploading Docker image %s:%s", image_name, image_tag) try: - arvados.commands.keepdocker.main(args, stdout=sys.stderr) + arvados.commands.put.api_client = api_client + arvados.commands.keepdocker.main(args, stdout=sys.stderr, install_sig_handlers=False, api=api_client) except SystemExit as e: + # If e.code is None or zero, then keepdocker exited normally and we can continue if e.code: raise WorkflowException("keepdocker exited with code %s" % e.code) images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3, image_name=image_name, - image_tag=image_tag) + image_tag=image_tag, + project_uuid=project_uuid) if not images: raise WorkflowException("Could not find Docker image %s:%s" % (image_name, image_tag)) - with cached_lookups_lock: - cached_lookups[dockerRequirement["dockerImageId"]] = True + pdh = api_client.collections().get(uuid=images[0][0]).execute()["portable_data_hash"] - return dockerRequirement["dockerImageId"] + cached_lookups[dockerRequirement["dockerImageId"]] = pdh -def arv_docker_clear_cache(): - global cached_lookups - global cached_lookups_lock - with cached_lookups_lock: - cached_lookups = {} + return pdh