21417: Update arv-keepdocker for OCI-layout archives 21417-keepdocker-oci-layout
authorBrett Smith <brett.smith@curii.com>
Fri, 2 Feb 2024 07:26:26 +0000 (02:26 -0500)
committerBrett Smith <brett.smith@curii.com>
Fri, 2 Feb 2024 07:26:26 +0000 (02:26 -0500)
Docker started saving image archives with this layout in version 25. Use
a more correct implementation that reads manifest.json as the archive
entry point and supports both layouts without branching.

Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith@curii.com>

sdk/python/arvados/commands/keepdocker.py
sdk/python/tests/data/hello-world-ManifestV2-OCILayout.tar [new file with mode: 0644]
sdk/python/tests/data/hello-world-ManifestV2.tar [new file with mode: 0644]
sdk/python/tests/data/hello-world-README.txt [new file with mode: 0644]
sdk/python/tests/test_arv_keepdocker.py

index a8b1dab5a8386d4cf830e384781f5d62ea3b929f..6823ee1beada080526c9a9aa901d752e7b5aefd9 100644 (file)
@@ -351,6 +351,25 @@ def _uuid2pdh(api, uuid):
         select=['portable_data_hash'],
     ).execute()['items'][0]['portable_data_hash']
 
+def load_image_metadata(image_file):
+    """Load an image manifest and config from an archive
+
+    Given an image archive as an open binary file object, this function loads
+    the image manifest and configuration, deserializing each from JSON and
+    returning them in a 2-tuple of dicts.
+    """
+    image_file.seek(0)
+    with tarfile.open(fileobj=image_file) as image_tar:
+        with image_tar.extractfile('manifest.json') as manifest_file:
+            image_manifest_list = json.load(manifest_file)
+        # Because arv-keepdocker only saves one image, there should only be
+        # one manifest.  This extracts that from the list and raises
+        # ValueError if there's not exactly one.
+        image_manifest, = image_manifest_list
+        with image_tar.extractfile(image_manifest['Config']) as config_file:
+            image_config = json.load(config_file)
+    return image_manifest, image_config
+
 def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None):
     args = arg_parser.parse_args(arguments)
     if api is None:
@@ -527,21 +546,9 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None)
         # Managed properties could be already set
         coll_properties = api.collections().get(uuid=coll_uuid).execute(num_retries=args.retries).get('properties', {})
         coll_properties.update({"docker-image-repo-tag": image_repo_tag})
-
         api.collections().update(uuid=coll_uuid, body={"properties": coll_properties}).execute(num_retries=args.retries)
 
-        # Read the image metadata and make Arvados links from it.
-        image_file.seek(0)
-        image_tar = tarfile.open(fileobj=image_file)
-        image_hash_type, _, raw_image_hash = image_hash.rpartition(':')
-        if image_hash_type:
-            json_filename = raw_image_hash + '.json'
-        else:
-            json_filename = raw_image_hash + '/json'
-        json_file = image_tar.extractfile(image_tar.getmember(json_filename))
-        image_metadata = json.loads(json_file.read().decode('utf-8'))
-        json_file.close()
-        image_tar.close()
+        _, image_metadata = load_image_metadata(image_file)
         link_base = {'head_uuid': coll_uuid, 'properties': {}}
         if 'created' in image_metadata:
             link_base['properties']['image_timestamp'] = image_metadata['created']
diff --git a/sdk/python/tests/data/hello-world-ManifestV2-OCILayout.tar b/sdk/python/tests/data/hello-world-ManifestV2-OCILayout.tar
new file mode 100644 (file)
index 0000000..a4b3d86
Binary files /dev/null and b/sdk/python/tests/data/hello-world-ManifestV2-OCILayout.tar differ
diff --git a/sdk/python/tests/data/hello-world-ManifestV2.tar b/sdk/python/tests/data/hello-world-ManifestV2.tar
new file mode 100644 (file)
index 0000000..b98e7c7
Binary files /dev/null and b/sdk/python/tests/data/hello-world-ManifestV2.tar differ
diff --git a/sdk/python/tests/data/hello-world-README.txt b/sdk/python/tests/data/hello-world-README.txt
new file mode 100644 (file)
index 0000000..8c6a7de
--- /dev/null
@@ -0,0 +1,25 @@
+The hello-world-*.tar files are archived from the official Docker
+hello-world:latest image available on 2024-02-01,
+sha256:d2c94e258dcb3c5ac2798d32e1249e42ef01cba4841c2234249495f87264ac5a.
+<https://github.com/docker-library/hello-world/tree/a2269bdb107d086851a5e3d448cf47770b50bff7>
+
+Copyright (c) 2014 Docker, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
index 50a92b3e6025df1eb469654cd4a177be6e1d1d88..9aebc0350424e0b4051d14687cf7c4376135c18c 100644 (file)
@@ -4,6 +4,7 @@
 
 import arvados
 import collections
+import collections.abc
 import copy
 import hashlib
 import logging
@@ -13,6 +14,9 @@ import subprocess
 import sys
 import tempfile
 import unittest
+from pathlib import Path
+
+import parameterized
 
 import arvados.commands.keepdocker as arv_keepdocker
 from . import arvados_testutil as tutil
@@ -223,3 +227,30 @@ class ArvKeepdockerTestCase(unittest.TestCase, tutil.VersionChecker):
         api().collections().update.assert_called_with(
             uuid=mocked_collection['uuid'],
             body={'properties': updated_properties})
+
+
+@parameterized.parameterized_class(('filename',), [
+    ('hello-world-ManifestV2.tar',),
+    ('hello-world-ManifestV2-OCILayout.tar',),
+])
+class ImageMetadataTestCase(unittest.TestCase):
+    DATA_PATH = Path(__file__).parent / 'data'
+
+    @classmethod
+    def setUpClass(cls):
+        cls.image_file = (cls.DATA_PATH / cls.filename).open('rb')
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.image_file.close()
+
+    def setUp(self):
+        self.manifest, self.config = arv_keepdocker.load_image_metadata(self.image_file)
+
+    def test_image_manifest(self):
+        self.assertIsInstance(self.manifest, collections.abc.Mapping)
+        self.assertEqual(self.manifest.get('RepoTags'), ['hello-world:latest'])
+
+    def test_image_config(self):
+        self.assertIsInstance(self.config, collections.abc.Mapping)
+        self.assertEqual(self.config.get('created'), '2023-05-02T16:49:27Z')