select=['portable_data_hash'],
).execute()['items'][0]['portable_data_hash']
+def load_image_metadata(image_file):
+ """Load an image manifest and config from an archive
+
+ Given an image archive as an open binary file object, this function loads
+ the image manifest and configuration, deserializing each from JSON and
+ returning them in a 2-tuple of dicts.
+ """
+ image_file.seek(0)
+ with tarfile.open(fileobj=image_file) as image_tar:
+ with image_tar.extractfile('manifest.json') as manifest_file:
+ image_manifest_list = json.load(manifest_file)
+ # Because arv-keepdocker only saves one image, there should only be
+ # one manifest. This extracts that from the list and raises
+ # ValueError if there's not exactly one.
+ image_manifest, = image_manifest_list
+ with image_tar.extractfile(image_manifest['Config']) as config_file:
+ image_config = json.load(config_file)
+ return image_manifest, image_config
+
def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None):
args = arg_parser.parse_args(arguments)
if api is None:
# Managed properties could be already set
coll_properties = api.collections().get(uuid=coll_uuid).execute(num_retries=args.retries).get('properties', {})
coll_properties.update({"docker-image-repo-tag": image_repo_tag})
-
api.collections().update(uuid=coll_uuid, body={"properties": coll_properties}).execute(num_retries=args.retries)
- # Read the image metadata and make Arvados links from it.
- image_file.seek(0)
- image_tar = tarfile.open(fileobj=image_file)
- image_hash_type, _, raw_image_hash = image_hash.rpartition(':')
- if image_hash_type:
- json_filename = raw_image_hash + '.json'
- else:
- json_filename = raw_image_hash + '/json'
- json_file = image_tar.extractfile(image_tar.getmember(json_filename))
- image_metadata = json.loads(json_file.read().decode('utf-8'))
- json_file.close()
- image_tar.close()
+ _, image_metadata = load_image_metadata(image_file)
link_base = {'head_uuid': coll_uuid, 'properties': {}}
if 'created' in image_metadata:
link_base['properties']['image_timestamp'] = image_metadata['created']
--- /dev/null
+The hello-world-*.tar files are archived from the official Docker
+hello-world:latest image available on 2024-02-01,
+sha256:d2c94e258dcb3c5ac2798d32e1249e42ef01cba4841c2234249495f87264ac5a.
+<https://github.com/docker-library/hello-world/tree/a2269bdb107d086851a5e3d448cf47770b50bff7>
+
+Copyright (c) 2014 Docker, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import arvados
import collections
+import collections.abc
import copy
import hashlib
import logging
import sys
import tempfile
import unittest
+from pathlib import Path
+
+import parameterized
import arvados.commands.keepdocker as arv_keepdocker
from . import arvados_testutil as tutil
api().collections().update.assert_called_with(
uuid=mocked_collection['uuid'],
body={'properties': updated_properties})
+
+
+@parameterized.parameterized_class(('filename',), [
+ ('hello-world-ManifestV2.tar',),
+ ('hello-world-ManifestV2-OCILayout.tar',),
+])
+class ImageMetadataTestCase(unittest.TestCase):
+ DATA_PATH = Path(__file__).parent / 'data'
+
+ @classmethod
+ def setUpClass(cls):
+ cls.image_file = (cls.DATA_PATH / cls.filename).open('rb')
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.image_file.close()
+
+ def setUp(self):
+ self.manifest, self.config = arv_keepdocker.load_image_metadata(self.image_file)
+
+ def test_image_manifest(self):
+ self.assertIsInstance(self.manifest, collections.abc.Mapping)
+ self.assertEqual(self.manifest.get('RepoTags'), ['hello-world:latest'])
+
+ def test_image_config(self):
+ self.assertIsInstance(self.config, collections.abc.Mapping)
+ self.assertEqual(self.config.get('created'), '2023-05-02T16:49:27Z')