Merge branch '18994-cwl-basename' refs #18994
[arvados.git] / sdk / cwl / arvados_cwl / arvdocker.py
index 3c820827132e378ca88efe4eca9e76ea7df468ef..04e2a4cffcfb5e674aa471555aa5e0c7fac2033e 100644 (file)
@@ -6,6 +6,8 @@ import logging
 import sys
 import threading
 import copy
+import re
+import subprocess
 
 from schema_salad.sourceline import SourceLine
 
@@ -18,8 +20,44 @@ logger = logging.getLogger('arvados.cwl-runner')
 cached_lookups = {}
 cached_lookups_lock = threading.Lock()
 
+def determine_image_id(dockerImageId):
+    for line in (
+        subprocess.check_output(  # nosec
+            ["docker", "images", "--no-trunc", "--all"]
+        )
+        .decode("utf-8")
+        .splitlines()
+    ):
+        try:
+            match = re.match(r"^([^ ]+)\s+([^ ]+)\s+([^ ]+)", line)
+            split = dockerImageId.split(":")
+            if len(split) == 1:
+                split.append("latest")
+            elif len(split) == 2:
+                #  if split[1] doesn't  match valid tag names, it is a part of repository
+                if not re.match(r"[\w][\w.-]{0,127}", split[1]):
+                    split[0] = split[0] + ":" + split[1]
+                    split[1] = "latest"
+            elif len(split) == 3:
+                if re.match(r"[\w][\w.-]{0,127}", split[2]):
+                    split[0] = split[0] + ":" + split[1]
+                    split[1] = split[2]
+                    del split[2]
+
+            # check for repository:tag match or image id match
+            if match and (
+                (split[0] == match.group(1) and split[1] == match.group(2))
+                or dockerImageId == match.group(3)
+            ):
+                return match.group(3)
+        except ValueError:
+            pass
+
+    return None
+
+
 def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid,
-                         force_pull, tmp_outdir_prefix):
+                         force_pull, tmp_outdir_prefix, match_local_docker):
     """Check if a Docker image is available in Keep, if not, upload it using arv-keepdocker."""
 
     if "http://arvados.org/cwl#dockerCollectionPDH" in dockerRequirement:
@@ -46,11 +84,27 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
                                                                 image_name=image_name,
                                                                 image_tag=image_tag)
 
+        if images and match_local_docker:
+            local_image_id = determine_image_id(dockerRequirement["dockerImageId"])
+            if local_image_id:
+                # find it in the list
+                found = False
+                for i in images:
+                    if i[1]["dockerhash"] == local_image_id:
+                        found = True
+                        images = [i]
+                        break
+                if not found:
+                    # force re-upload.
+                    images = []
+
         if not images:
             # Fetch Docker image if necessary.
             try:
-                cwltool.docker.DockerCommandLineJob.get_image(dockerRequirement, pull_image,
+                result = cwltool.docker.DockerCommandLineJob.get_image(dockerRequirement, pull_image,
                                                               force_pull, tmp_outdir_prefix)
+                if not result:
+                    raise WorkflowException("Docker image '%s' not available" % dockerRequirement["dockerImageId"])
             except OSError as e:
                 raise WorkflowException("While trying to get Docker image '%s', failed to execute 'docker': %s" % (dockerRequirement["dockerImageId"], e))