import sys
import threading
import copy
+import re
+import subprocess
from schema_salad.sourceline import SourceLine
cached_lookups = {}
cached_lookups_lock = threading.Lock()
-def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid):
+def determine_image_id(dockerImageId):
+ for line in (
+ subprocess.check_output( # nosec
+ ["docker", "images", "--no-trunc", "--all"]
+ )
+ .decode("utf-8")
+ .splitlines()
+ ):
+ try:
+ match = re.match(r"^([^ ]+)\s+([^ ]+)\s+([^ ]+)", line)
+ split = dockerImageId.split(":")
+ if len(split) == 1:
+ split.append("latest")
+ elif len(split) == 2:
+ # if split[1] doesn't match valid tag names, it is a part of repository
+ if not re.match(r"[\w][\w.-]{0,127}", split[1]):
+ split[0] = split[0] + ":" + split[1]
+ split[1] = "latest"
+ elif len(split) == 3:
+ if re.match(r"[\w][\w.-]{0,127}", split[2]):
+ split[0] = split[0] + ":" + split[1]
+ split[1] = split[2]
+ del split[2]
+
+ # check for repository:tag match or image id match
+ if match and (
+ (split[0] == match.group(1) and split[1] == match.group(2))
+ or dockerImageId == match.group(3)
+ ):
+ return match.group(3)
+ except ValueError:
+ pass
+
+ return None
+
+
+def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid,
+ force_pull, tmp_outdir_prefix, match_local_docker):
"""Check if a Docker image is available in Keep, if not, upload it using arv-keepdocker."""
+ if "http://arvados.org/cwl#dockerCollectionPDH" in dockerRequirement:
+ return dockerRequirement["http://arvados.org/cwl#dockerCollectionPDH"]
+
if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement:
dockerRequirement = copy.deepcopy(dockerRequirement)
dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"]
global cached_lookups_lock
with cached_lookups_lock:
if dockerRequirement["dockerImageId"] in cached_lookups:
- return dockerRequirement["dockerImageId"]
+ return cached_lookups[dockerRequirement["dockerImageId"]]
with SourceLine(dockerRequirement, "dockerImageId", WorkflowException, logger.isEnabledFor(logging.DEBUG)):
sp = dockerRequirement["dockerImageId"].split(":")
image_name=image_name,
image_tag=image_tag)
+ if images and match_local_docker:
+ local_image_id = determine_image_id(dockerRequirement["dockerImageId"])
+ if local_image_id:
+ # find it in the list
+ found = False
+ for i in images:
+ if i[1]["dockerhash"] == local_image_id:
+ found = True
+ images = [i]
+ break
+ if not found:
+ # force re-upload.
+ images = []
+
if not images:
# Fetch Docker image if necessary.
try:
- cwltool.docker.DockerCommandLineJob.get_image(dockerRequirement, pull_image)
+ result = cwltool.docker.DockerCommandLineJob.get_image(dockerRequirement, pull_image,
+ force_pull, tmp_outdir_prefix)
+ if not result:
+ raise WorkflowException("Docker image '%s' not available" % dockerRequirement["dockerImageId"])
except OSError as e:
raise WorkflowException("While trying to get Docker image '%s', failed to execute 'docker': %s" % (dockerRequirement["dockerImageId"], e))
args.append(image_tag)
logger.info("Uploading Docker image %s:%s", image_name, image_tag)
try:
- arvados.commands.keepdocker.main(args, stdout=sys.stderr)
+ arvados.commands.put.api_client = api_client
+ arvados.commands.keepdocker.main(args, stdout=sys.stderr, install_sig_handlers=False, api=api_client)
except SystemExit as e:
+ # If e.code is None or zero, then keepdocker exited normally and we can continue
if e.code:
raise WorkflowException("keepdocker exited with code %s" % e.code)
if not images:
raise WorkflowException("Could not find Docker image %s:%s" % (image_name, image_tag))
+ pdh = api_client.collections().get(uuid=images[0][0]).execute()["portable_data_hash"]
+
with cached_lookups_lock:
- cached_lookups[dockerRequirement["dockerImageId"]] = True
+ cached_lookups[dockerRequirement["dockerImageId"]] = pdh
- return dockerRequirement["dockerImageId"]
+ return pdh
def arv_docker_clear_cache():
global cached_lookups