From 8eeb2671b0a9815345218da335731a31230ada13 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 8 Sep 2014 16:54:49 -0400 Subject: [PATCH] 3453: Refactored arv-put to remove support for name links, correctly use ensure_name_unique to prevent name collisions. arv-keepdocker should now correctly handle cases where the user provides a image hash instead of repository/tag. Fixed tests. --- sdk/python/arvados/commands/keepdocker.py | 34 +++--- sdk/python/arvados/commands/put.py | 124 ++++++++++------------ sdk/python/arvados/util.py | 7 ++ sdk/python/tests/test_arv_put.py | 91 +++------------- 4 files changed, 95 insertions(+), 161 deletions(-) diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py index df13a948bd..c96e2cbd60 100644 --- a/sdk/python/arvados/commands/keepdocker.py +++ b/sdk/python/arvados/commands/keepdocker.py @@ -9,7 +9,6 @@ import subprocess import sys import tarfile import tempfile -import textwrap from collections import namedtuple from stat import * @@ -193,7 +192,7 @@ def list_images_in_arv(): fmt = "{:30} {:10} {:12} {:29} {:20}" print fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED") for i, j in st: - print(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:11], i, j["timestamp"].strftime("%c"))) + print(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:12], i, j["timestamp"].strftime("%c"))) def main(arguments=None): args = arg_parser.parse_args(arguments) @@ -213,10 +212,13 @@ def main(arguments=None): print >>sys.stderr, "arv-keepdocker:", error.message sys.exit(1) - image_repo_tag = '{}:{}'.format(args.image, args.tag) + image_repo_tag = '{}:{}'.format(args.image, args.tag) if not image_hash.startswith(args.image.lower()) else None if args.name is None: - collection_name = 'Docker image {} {}'.format(image_repo_tag, image_hash[0:11]) + if image_repo_tag: + collection_name = 'Docker image {} {}'.format(image_repo_tag, image_hash[0:12]) + else: + collection_name = 'Docker image {}'.format(image_hash[0:12]) else: collection_name = args.name @@ -235,45 +237,45 @@ def main(arguments=None): if existing_links: # get readable collections collections = api.collections().list( - filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]], + filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]], select=["uuid", "owner_uuid", "name", "manifest_text"]).execute()['items'] if collections: # check for repo+tag links on these collections - existing_repo_tag = api.links().list( + existing_repo_tag = (api.links().list( filters=[['link_class', '=', 'docker_image_repo+tag'], ['name', '=', image_repo_tag], - ['head_uuid', 'in', collections]]).execute()['items'] + ['head_uuid', 'in', collections]]).execute()['items']) if image_repo_tag else [] # Filter on elements owned by the parent project owned_col = [c for c in collections if c['owner_uuid'] == parent_project_uuid] - owned_img = [c for c in existing_links if c['owner_uuid'] == parent_project_uuid] - owned_rep = [c for c in existing_repo_tag if c['owner_uuid'] == parent_project_uuid] + owned_img = [c for c in existing_links if c['owner_uuid'] == parent_project_uuid] + owned_rep = [c for c in existing_repo_tag if c['owner_uuid'] == parent_project_uuid] if owned_col: # already have a collection owned by this project coll_uuid = owned_col[0]['uuid'] else: # create new collection owned by the project - coll_uuid = api.collections().create(body={"manifest_text": collections[0]['manifest_text'], - "name": collection_name, - "owner_uuid": parent_project_uuid}, + coll_uuid = api.collections().create(body={"manifest_text": collections[0]['manifest_text'], + "name": collection_name, + "owner_uuid": parent_project_uuid}, ensure_unique_name=True).execute()['uuid'] - link_base = {'owner_uuid': parent_project_uuid, + link_base = {'owner_uuid': parent_project_uuid, 'head_uuid': coll_uuid } if not owned_img: # create image link owned by the project make_link('docker_image_hash', image_hash, **link_base) - if not owned_rep: + if not owned_rep and image_repo_tag: # create repo+tag link owned by the project make_link('docker_image_repo+tag', image_repo_tag, **link_base) print(coll_uuid) - sys.exit(0) + sys.exit(0) # Open a file for the saved image, and write it if needed. outfile_name = '{}.tar'.format(image_hash) @@ -305,7 +307,7 @@ def main(arguments=None): link_base['owner_uuid'] = args.project_uuid make_link('docker_image_hash', image_hash, **link_base) - if not image_hash.startswith(args.image.lower()): + if image_repo_tag: make_link('docker_image_repo+tag', image_repo_tag, **link_base) diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py index ed405dc1d9..0645cb49a7 100644 --- a/sdk/python/arvados/commands/put.py +++ b/sdk/python/arvados/commands/put.py @@ -348,44 +348,18 @@ def progress_writer(progress_func, outfile=sys.stderr): def exit_signal_handler(sigcode, frame): sys.exit(-sigcode) -def check_project_exists(project_uuid): - try: - api_client.groups().get(uuid=project_uuid).execute() - except (apiclient.errors.Error, arvados.errors.NotFoundError) as error: - raise ValueError("Project {} not found ({})".format(project_uuid, - error)) +def check_project_exists(api_client, project_uuid): + if project_uuid: + if arvados.util.user_uuid_pattern.match(project_uuid): + api_client.users().get(uuid=project_uuid).execute() + return project_uuid + elif arvados.util.group_uuid_pattern.match(project_uuid): + api_client.groups().get(uuid=project_uuid).execute() + return project_uuid + else: + raise Exception("Not a valid project uuid: {}".format(project_uuid)) else: - return True - -def prep_project_link(args, stderr, project_exists=check_project_exists): - # Given the user's command line arguments, return a dictionary with data - # to create the desired project link for this Collection, or None. - # Raises ValueError if the arguments request something impossible. - making_collection = not (args.raw or args.stream) - if not making_collection: - if args.name or args.project_uuid: - raise ValueError("Requested a Link without creating a Collection") - return None - link = {'tail_uuid': args.project_uuid, - 'link_class': 'name', - 'name': args.name} - if not link['tail_uuid']: - link['tail_uuid'] = api_client.users().current().execute()['uuid'] - elif not project_exists(link['tail_uuid']): - raise ValueError("Project {} not found".format(args.project_uuid)) - if not link['name']: - link['name'] = "Saved at {} by {}@{}".format( - datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"), - pwd.getpwuid(os.getuid()).pw_name, - socket.gethostname()) - stderr.write( - "arv-put: No --name specified. Saving as \"%s\"\n" % link['name']) - link['owner_uuid'] = link['tail_uuid'] - return link - -def create_project_link(locator, link): - link['head_uuid'] = locator - return api_client.links().create(body=link).execute() + return api_client.users().current().execute()['uuid'] def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): global api_client @@ -394,11 +368,29 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): status = 0 args = parse_arguments(arguments) + + # Determine the name to use + if args.name: + if args.stream or args.raw: + print >>stderr, "Cannot use --name with --stream or --raw" + sys.exit(1) + collection_name = args.name + else: + collection_name = "Saved at {} by {}@{}".format( + datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"), + pwd.getpwuid(os.getuid()).pw_name, + socket.gethostname()) + + if args.project_uuid and (args.stream or args.raw): + print >>stderr, "Cannot use --project-uuid with --stream or --raw" + sys.exit(1) + + # Determine the parent project try: - project_link = prep_project_link(args, stderr) - except ValueError as error: - print >>stderr, "arv-put: {}.".format(error) - sys.exit(2) + project_uuid = check_project_exists(api_client, args.project_uuid) + except Exception as error: + print >>stderr, "Project {} not found: {}".format(args.project_uuid, error) + sys.exit(1) if args.progress: reporter = progress_writer(human_progress) @@ -455,33 +447,29 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): elif args.raw: output = ','.join(writer.data_locators()) else: - # Register the resulting collection in Arvados. - collection = api_client.collections().create( - body={ - 'manifest_text': writer.manifest_text(), - 'owner_uuid': project_link['tail_uuid'] - }, - ensure_unique_name=True - ).execute() - - if args.portable_data_hash and 'portable_data_hash' in collection and collection['portable_data_hash']: - output = collection['portable_data_hash'] - else: - output = collection['uuid'] - - if project_link is not None: - # Update collection name - try: - if 'name' in collection: - arvados.api().collections().update(uuid=collection['uuid'], - body={"name": project_link["name"]}).execute() - else: - create_project_link(output, project_link) - except apiclient.errors.Error as error: - print >>stderr, ( - "arv-put: Error adding Collection to project: {}.".format( - error)) - status = 1 + try: + # Register the resulting collection in Arvados. + collection = api_client.collections().create( + body={ + 'owner_uuid': project_uuid, + 'name': collection_name, + 'manifest_text': writer.manifest_text() + }, + ensure_unique_name=True + ).execute() + + print >>stderr, "Collection saved as '%s'" % collection['name'] + + if args.portable_data_hash and 'portable_data_hash' in collection and collection['portable_data_hash']: + output = collection['portable_data_hash'] + else: + output = collection['uuid'] + + except apiclient.errors.Error as error: + print >>stderr, ( + "arv-put: Error adding Collection to project: {}.".format( + error)) + status = 1 # Print the locator (uuid) of the new collection. stdout.write(output) diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py index 22a7427ebb..47d1d5cd94 100644 --- a/sdk/python/arvados/util.py +++ b/sdk/python/arvados/util.py @@ -9,6 +9,13 @@ from arvados.collection import * HEX_RE = re.compile(r'^[0-9a-fA-F]+$') +portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+\d+') +uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}') +collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}') +group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}') +user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}') +link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}') + def clear_tmpdir(path=None): """ Ensure the given directory (or TASK_TMPDIR if none given) diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py index 0ce51af8b5..3b0ee13519 100644 --- a/sdk/python/tests/test_arv_put.py +++ b/sdk/python/tests/test_arv_put.py @@ -326,82 +326,9 @@ class ArvadosPutReportTest(ArvadosBaseTestCase): arv_put.human_progress(count, None))) -class ArvadosPutProjectLinkTest(ArvadosBaseTestCase): - Z_UUID = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz' - - def setUp(self): - self.stderr = StringIO() - super(ArvadosPutProjectLinkTest, self).setUp() - - def tearDown(self): - self.stderr.close() - super(ArvadosPutProjectLinkTest, self).tearDown() - - def prep_link_from_arguments(self, args, uuid_found=True): - try: - link = arv_put.prep_project_link(arv_put.parse_arguments(args), - self.stderr, - lambda uuid: uuid_found) - finally: - self.stderr.seek(0) - return link - - def check_link(self, link, project_uuid, link_name=None): - self.assertEqual(project_uuid, link.get('tail_uuid')) - self.assertEqual(project_uuid, link.get('owner_uuid')) - self.assertEqual('name', link.get('link_class')) - if link_name is None: - self.assertNotIn('name', link) - else: - self.assertEqual(link_name, link.get('name')) - self.assertNotIn('head_uuid', link) - - def check_stderr_empty(self): - self.assertEqual('', self.stderr.getvalue()) - - def test_project_link_with_name(self): - link = self.prep_link_from_arguments(['--project-uuid', self.Z_UUID, - '--name', 'test link AAA']) - self.check_link(link, self.Z_UUID, 'test link AAA') - self.check_stderr_empty() - - def test_project_link_without_name(self): - username = pwd.getpwuid(os.getuid()).pw_name - link = self.prep_link_from_arguments(['--project-uuid', self.Z_UUID]) - self.assertIsNotNone(link.get('name', None)) - self.assertRegexpMatches( - link['name'], - r'^Saved at .* by {}@'.format(re.escape(username))) - self.check_link(link, self.Z_UUID, link.get('name', None)) - for line in self.stderr: - if "No --name specified" in line: - break - else: - self.fail("no warning emitted about the lack of collection name") - - @unittest.skip("prep_project_link needs an API lookup for this case") - def test_collection_without_project_defaults_to_home(self): - link = self.prep_link_from_arguments(['--name', 'test link BBB']) - self.check_link(link, self.Z_UUID) - self.check_stderr_empty() - - def test_no_link_or_warning_with_no_collection(self): - self.assertIsNone(self.prep_link_from_arguments(['--raw'])) - self.check_stderr_empty() - - def test_error_when_project_not_found(self): - self.assertRaises(ValueError, - self.prep_link_from_arguments, - ['--project-uuid', self.Z_UUID], False) - - def test_link_without_collection_is_error(self): - self.assertRaises(ValueError, - self.prep_link_from_arguments, - ['--project-uuid', self.Z_UUID, '--stream']) - - class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase): MAIN_SERVER = {} + Z_UUID = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz' def call_main_with_args(self, args): self.main_stdout = StringIO() @@ -454,11 +381,21 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase): arv_put.ResumeCache.CACHE_DIR = orig_cachedir os.chmod(cachedir, 0o700) - def test_link_without_collection_aborts(self): + def test_error_name_without_collection(self): self.assertRaises(SystemExit, self.call_main_with_args, ['--name', 'test without Collection', '--stream', '/dev/null']) + def test_error_when_project_not_found(self): + self.assertRaises(SystemExit, + self.call_main_with_args, + ['--project-uuid', self.Z_UUID]) + + def test_error_bad_project_uuid(self): + self.assertRaises(SystemExit, + self.call_main_with_args, + ['--project-uuid', self.Z_UUID, '--stream']) + class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase): def _getKeepServerConfig(): @@ -503,14 +440,14 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, def test_check_real_project_found(self): self.authorize_with('active') - self.assertTrue(arv_put.check_project_exists(self.PROJECT_UUID), + self.assertTrue(arv_put.check_project_exists(arv_put.api_client, self.PROJECT_UUID), "did not correctly find test fixture project") def test_check_error_finding_nonexistent_project(self): BAD_UUID = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz' self.authorize_with('active') try: - result = arv_put.check_project_exists(BAD_UUID) + result = arv_put.check_project_exists(arv_put.api_client, BAD_UUID) except ValueError as error: self.assertIn(BAD_UUID, error.message) else: -- 2.39.5