18947: Merge branch 'main'
[arvados.git] / sdk / python / arvados / commands / keepdocker.py
index c89fa644ccf295e7d62bb16ed594c0951fb90e0d..db4edd2dfa6f1e089979c56bbb6751afd84b2c3c 100644 (file)
@@ -85,11 +85,12 @@ class DockerError(Exception):
 def popen_docker(cmd, *args, **kwargs):
     manage_stdin = ('stdin' not in kwargs)
     kwargs.setdefault('stdin', subprocess.PIPE)
-    kwargs.setdefault('stdout', sys.stderr)
+    kwargs.setdefault('stdout', subprocess.PIPE)
+    kwargs.setdefault('stderr', subprocess.PIPE)
     try:
-        docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
-    except OSError:  # No docker.io in $PATH
         docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
+    except OSError:  # No docker in $PATH, try docker.io
+        docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
     if manage_stdin:
         docker_proc.stdin.close()
     return docker_proc
@@ -146,20 +147,18 @@ def docker_images():
     check_docker(list_proc, "images")
 
 def find_image_hashes(image_search, image_tag=None):
-    # Given one argument, search for Docker images with matching hashes,
-    # and return their full hashes in a set.
-    # Given two arguments, also search for a Docker image with the
-    # same repository and tag.  If one is found, return its hash in a
-    # set; otherwise, fall back to the one-argument hash search.
-    # Returns None if no match is found, or a hash search is ambiguous.
-    hash_search = image_search.lower()
-    hash_matches = set()
-    for image in docker_images():
-        if (image.repo == image_search) and (image.tag == image_tag):
-            return set([image.hash])
-        elif image.hash.startswith(hash_search):
-            hash_matches.add(image.hash)
-    return hash_matches
+    # Query for a Docker images with the repository and tag and return
+    # the image ids in a list.  Returns empty list if no match is
+    # found.
+
+    list_proc = popen_docker(['inspect', "%s%s" % (image_search, ":"+image_tag if image_tag else "")], stdout=subprocess.PIPE)
+
+    inspect = list_proc.stdout.read()
+    list_proc.stdout.close()
+
+    imageinfo = json.loads(inspect)
+
+    return [i["Id"] for i in imageinfo]
 
 def find_one_image_hash(image_search, image_tag=None):
     hashes = find_image_hashes(image_search, image_tag)
@@ -259,7 +258,7 @@ def _new_image_listing(link, dockerhash, repo='<none>', tag='<none>'):
         'tag': tag,
         }
 
-def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None):
+def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None, project_uuid=None):
     """List all Docker images known to the api_client with image_name and
     image_tag.  If no image_name is given, defaults to listing all
     Docker images.
@@ -274,13 +273,18 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
     search_filters = []
     repo_links = None
     hash_links = None
+
+    project_filter = []
+    if project_uuid is not None:
+        project_filter = [["owner_uuid", "=", project_uuid]]
+
     if image_name:
         # Find images with the name the user specified.
         search_links = _get_docker_links(
             api_client, num_retries,
             filters=[['link_class', '=', 'docker_image_repo+tag'],
                      ['name', '=',
-                      '{}:{}'.format(image_name, image_tag or 'latest')]])
+                      '{}:{}'.format(image_name, image_tag or 'latest')]]+project_filter)
         if search_links:
             repo_links = search_links
         else:
@@ -288,7 +292,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
             search_links = _get_docker_links(
                 api_client, num_retries,
                 filters=[['link_class', '=', 'docker_image_hash'],
-                         ['name', 'ilike', image_name + '%']])
+                         ['name', 'ilike', image_name + '%']]+project_filter)
             hash_links = search_links
         # Only list information about images that were found in the search.
         search_filters.append(['head_uuid', 'in',
@@ -300,7 +304,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
     if hash_links is None:
         hash_links = _get_docker_links(
             api_client, num_retries,
-            filters=search_filters + [['link_class', '=', 'docker_image_hash']])
+            filters=search_filters + [['link_class', '=', 'docker_image_hash']]+project_filter)
     hash_link_map = {link['head_uuid']: link for link in reversed(hash_links)}
 
     # Each collection may have more than one name (though again, one name
@@ -310,7 +314,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
         repo_links = _get_docker_links(
             api_client, num_retries,
             filters=search_filters + [['link_class', '=',
-                                       'docker_image_repo+tag']])
+                                       'docker_image_repo+tag']]+project_filter)
     seen_image_names = collections.defaultdict(set)
     images = []
     for link in repo_links:
@@ -338,7 +342,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
     # Remove any image listings that refer to unknown collections.
     existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all(
             api_client.collections().list, num_retries,
-            filters=[['uuid', 'in', [im['collection'] for im in images]]],
+            filters=[['uuid', 'in', [im['collection'] for im in images]]]+project_filter,
             select=['uuid'])}
     return [(image['collection'], image) for image in images
             if image['collection'] in existing_coll_uuids]
@@ -387,18 +391,25 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None)
     if args.pull and not find_image_hashes(args.image):
         pull_image(args.image, args.tag)
 
+    images_in_arv = list_images_in_arv(api, args.retries, args.image, args.tag)
+
+    image_hash = None
     try:
         image_hash = find_one_image_hash(args.image, args.tag)
+        if not docker_image_compatible(api, image_hash):
+            if args.force_image_format:
+                logger.warning("forcing incompatible image")
+            else:
+                logger.error("refusing to store " \
+                    "incompatible format (use --force-image-format to override)")
+                sys.exit(1)
     except DockerError as error:
-        logger.error(error.message)
-        sys.exit(1)
-
-    if not docker_image_compatible(api, image_hash):
-        if args.force_image_format:
-            logger.warning("forcing incompatible image")
+        if images_in_arv:
+            # We don't have Docker / we don't have the image locally,
+            # use image that's already uploaded to Arvados
+            image_hash = images_in_arv[0][1]['dockerhash']
         else:
-            logger.error("refusing to store " \
-                "incompatible format (use --force-image-format to override)")
+            logger.error(str(error))
             sys.exit(1)
 
     image_repo_tag = '{}:{}'.format(args.image, args.tag) if not image_hash.startswith(args.image.lower()) else None
@@ -498,6 +509,9 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None)
         arguments = [i for i in arguments if i not in (args.image, args.tag, image_repo_tag)]
         put_args = keepdocker_parser.parse_known_args(arguments)[1]
 
+        # Don't fail when cached manifest is invalid, just ignore the cache.
+        put_args += ['--batch']
+
         if args.name is None:
             put_args += ['--name', collection_name]
 
@@ -505,7 +519,11 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None)
             put_args + ['--filename', outfile_name, image_file.name], stdout=stdout,
             install_sig_handlers=install_sig_handlers).strip()
 
-        api.collections().update(uuid=coll_uuid, body={"properties": {"docker-image-repo-tag": image_repo_tag}}).execute(num_retries=args.retries)
+        # Managed properties could be already set
+        coll_properties = api.collections().get(uuid=coll_uuid).execute(num_retries=args.retries).get('properties', {})
+        coll_properties.update({"docker-image-repo-tag": image_repo_tag})
+
+        api.collections().update(uuid=coll_uuid, body={"properties": coll_properties}).execute(num_retries=args.retries)
 
         # Read the image metadata and make Arvados links from it.
         image_file.seek(0)