X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/6fc7bd0626e93dd20fc58167300186e9f8820638..dcdd0bd36ae76c154d12ffb2f3759cef15d4c8a9:/sdk/python/tests/test_arv_put.py diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py index 642f64ec5a..eb97ebcfa8 100644 --- a/sdk/python/tests/test_arv_put.py +++ b/sdk/python/tests/test_arv_put.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 @@ -8,7 +10,9 @@ from future import standard_library standard_library.install_aliases() from builtins import str from builtins import range +from functools import partial import apiclient +import ciso8601 import datetime import hashlib import json @@ -209,7 +213,7 @@ class ArvadosPutResumeCacheTest(ArvadosBaseTestCase): def test_cache_is_locked(self): with tempfile.NamedTemporaryFile() as cachefile: - cache = arv_put.ResumeCache(cachefile.name) + _ = arv_put.ResumeCache(cachefile.name) self.assertRaises(arv_put.ResumeCacheConflict, arv_put.ResumeCache, cachefile.name) @@ -291,6 +295,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, shutil.rmtree(self.tempdir_with_symlink) def test_symlinks_are_followed_by_default(self): + self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir'))) + self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkedfile'))) cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink]) cwriter.start(save_collection=False) self.assertIn('linkeddir', cwriter.manifest_text()) @@ -298,17 +304,34 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, cwriter.destroy_cache() def test_symlinks_are_not_followed_when_requested(self): + self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir'))) + self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkedfile'))) cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink], follow_links=False) cwriter.start(save_collection=False) self.assertNotIn('linkeddir', cwriter.manifest_text()) self.assertNotIn('linkedfile', cwriter.manifest_text()) cwriter.destroy_cache() + # Check for bug #17800: passed symlinks should also be ignored. + linked_dir = os.path.join(self.tempdir_with_symlink, 'linkeddir') + cwriter = arv_put.ArvPutUploadJob([linked_dir], follow_links=False) + cwriter.start(save_collection=False) + self.assertNotIn('linkeddir', cwriter.manifest_text()) + cwriter.destroy_cache() + + def test_no_empty_collection_saved(self): + self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir'))) + linked_dir = os.path.join(self.tempdir_with_symlink, 'linkeddir') + cwriter = arv_put.ArvPutUploadJob([linked_dir], follow_links=False) + cwriter.start(save_collection=True) + self.assertIsNone(cwriter.manifest_locator()) + self.assertEqual('', cwriter.manifest_text()) + cwriter.destroy_cache() def test_passing_nonexistant_path_raise_exception(self): uuid_str = str(uuid.uuid4()) with self.assertRaises(arv_put.PathDoesNotExistError): - cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)]) + arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)]) def test_writer_works_without_cache(self): cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False) @@ -528,6 +551,85 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, resume=False) del(self.writer) +class CachedManifestValidationTest(ArvadosBaseTestCase): + class MockedPut(arv_put.ArvPutUploadJob): + def __init__(self, cached_manifest=None): + self._state = arv_put.ArvPutUploadJob.EMPTY_STATE + self._state['manifest'] = cached_manifest + self._api_client = mock.MagicMock() + self.logger = mock.MagicMock() + self.num_retries = 1 + + def datetime_to_hex(self, dt): + return hex(int(time.mktime(dt.timetuple())))[2:] + + def setUp(self): + super(CachedManifestValidationTest, self).setUp() + self.block1 = "fdba98970961edb29f88241b9d99d890" # foo + self.block2 = "37b51d194a7513e45b56f6524f2d51f2" # bar + self.template = ". "+self.block1+"+3+Asignature@%s "+self.block2+"+3+Anothersignature@%s 0:3:foofile.txt 3:6:barfile.txt\n" + + def test_empty_cached_manifest_is_valid(self): + put_mock = self.MockedPut() + self.assertEqual(None, put_mock._state.get('manifest')) + self.assertTrue(put_mock._cached_manifest_valid()) + put_mock._state['manifest'] = '' + self.assertTrue(put_mock._cached_manifest_valid()) + + def test_signature_cases(self): + now = datetime.datetime.utcnow() + yesterday = now - datetime.timedelta(days=1) + lastweek = now - datetime.timedelta(days=7) + tomorrow = now + datetime.timedelta(days=1) + nextweek = now + datetime.timedelta(days=7) + + def mocked_head(blocks={}, loc=None): + blk = loc.split('+', 1)[0] + if blocks.get(blk): + return True + raise arvados.errors.KeepRequestError("mocked error - block invalid") + + # Block1_expiration, Block2_expiration, Block1_HEAD, Block2_HEAD, Expectation + cases = [ + # All expired, reset cache - OK + (yesterday, lastweek, False, False, True), + (lastweek, yesterday, False, False, True), + # All non-expired valid blocks - OK + (tomorrow, nextweek, True, True, True), + (nextweek, tomorrow, True, True, True), + # All non-expired invalid blocks - Not OK + (tomorrow, nextweek, False, False, False), + (nextweek, tomorrow, False, False, False), + # One non-expired valid block - OK + (tomorrow, yesterday, True, False, True), + (yesterday, tomorrow, False, True, True), + # One non-expired invalid block - Not OK + (tomorrow, yesterday, False, False, False), + (yesterday, tomorrow, False, False, False), + ] + for case in cases: + b1_expiration, b2_expiration, b1_valid, b2_valid, outcome = case + head_responses = { + self.block1: b1_valid, + self.block2: b2_valid, + } + cached_manifest = self.template % ( + self.datetime_to_hex(b1_expiration), + self.datetime_to_hex(b2_expiration), + ) + arvput = self.MockedPut(cached_manifest) + with mock.patch('arvados.collection.KeepClient.head') as head_mock: + head_mock.side_effect = partial(mocked_head, head_responses) + self.assertEqual(outcome, arvput._cached_manifest_valid(), + "Case '%s' should have produced outcome '%s'" % (case, outcome) + ) + if b1_expiration > now or b2_expiration > now: + # A HEAD request should have been done + head_mock.assert_called_once() + else: + head_mock.assert_not_called() + + class ArvadosExpectedBytesTest(ArvadosBaseTestCase): TEST_SIZE = os.path.getsize(__file__) @@ -761,7 +863,7 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, fake_httplib2_response(403), b'{}') with mock.patch('arvados.collection.Collection.save_new', new=coll_save_mock): - with self.assertRaises(SystemExit) as exc_test: + with self.assertRaises(SystemExit): self.call_main_with_args(['/dev/null']) self.assertRegex( self.main_stderr.getvalue(), matcher) @@ -769,27 +871,8 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase): - def _getKeepServerConfig(): - for config_file, mandatory in [ - ['application.yml', False], ['application.default.yml', True]]: - path = os.path.join(run_test_server.SERVICES_SRC_DIR, - "api", "config", config_file) - if not mandatory and not os.path.exists(path): - continue - with open(path) as f: - rails_config = yaml.load(f.read()) - for config_section in ['test', 'common']: - try: - key = rails_config[config_section]["blob_signing_key"] - except (KeyError, TypeError): - pass - else: - return {'blob_signing_key': key, - 'enforce_permissions': True} - return {'blog_signing_key': None, 'enforce_permissions': False} - MAIN_SERVER = {} - KEEP_SERVER = _getKeepServerConfig() + KEEP_SERVER = {'blob_signing': True} PROJECT_UUID = run_test_server.fixture('groups')['aproject']['uuid'] @classmethod @@ -836,7 +919,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, BAD_UUID = 'zzzzz-tpzed-zzzzzzzzzzzzzzz' self.authorize_with('active') with self.assertRaises(apiclient.errors.HttpError): - result = arv_put.desired_project_uuid(arv_put.api_client, BAD_UUID, + arv_put.desired_project_uuid(arv_put.api_client, BAD_UUID, 0) def test_short_put_from_stdin(self): @@ -848,7 +931,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, [sys.executable, arv_put.__file__, '--stream'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=self.ENVIRON) - pipe.stdin.write(b'stdin test\n') + pipe.stdin.write(b'stdin test\xa6\n') pipe.stdin.close() deadline = time.time() + 5 while (pipe.poll() is None) and (time.time() < deadline): @@ -860,7 +943,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, elif returncode != 0: sys.stdout.write(pipe.stdout.read()) self.fail("arv-put returned exit code {}".format(returncode)) - self.assertIn('4a9c8b735dce4b5fa3acf221a0b13628+11', + self.assertIn('1cb671b355a0c23d5d1c61d59cdb1b2b+12', pipe.stdout.read().decode()) def test_sigint_logs_request_id(self): @@ -897,7 +980,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, # we're about to create is not present in our test fixture. manifest_uuid = "00b4e9f40ac4dd432ef89749f1c01e74+47" with self.assertRaises(apiclient.errors.HttpError): - notfound = arv_put.api_client.collections().get( + arv_put.api_client.collections().get( uuid=manifest_uuid).execute() datadir = self.make_tmpdir() @@ -908,7 +991,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.ENVIRON) - (out, err) = p.communicate() + (_, err) = p.communicate() self.assertRegex(err.decode(), r'INFO: Collection saved as ') self.assertEqual(p.returncode, 0) @@ -948,7 +1031,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.ENVIRON) - (out, err) = p.communicate() + (_, err) = p.communicate() self.assertRegex(err.decode(), r'INFO: Creating new cache file at ') self.assertEqual(p.returncode, 0) cache_filepath = re.search(r'INFO: Creating new cache file at (.*)', @@ -971,7 +1054,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.ENVIRON) - (out, err) = p.communicate() + (_, err) = p.communicate() self.assertRegex( err.decode(), r'INFO: Cache expired, starting from scratch.*') @@ -987,7 +1070,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.ENVIRON) - (out, err) = p.communicate() + (_, err) = p.communicate() self.assertRegex(err.decode(), r'INFO: Creating new cache file at ') self.assertEqual(p.returncode, 0) cache_filepath = re.search(r'INFO: Creating new cache file at (.*)', @@ -1009,10 +1092,10 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.ENVIRON) - (out, err) = p.communicate() + (_, err) = p.communicate() self.assertRegex( err.decode(), - r'ERROR: arv-put: Cache seems to contain invalid data.*') + r'ERROR: arv-put: Resume cache contains invalid signature.*') self.assertEqual(p.returncode, 1) def test_single_expired_signature_reuploads_file(self): @@ -1029,7 +1112,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.ENVIRON) - (out, err) = p.communicate() + (_, err) = p.communicate() self.assertRegex(err.decode(), r'INFO: Creating new cache file at ') self.assertEqual(p.returncode, 0) cache_filepath = re.search(r'INFO: Creating new cache file at (.*)', @@ -1053,7 +1136,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.ENVIRON) - (out, err) = p.communicate() + (_, err) = p.communicate() self.assertRegex( err.decode(), r'WARNING: Uploaded file \'.*barfile.txt\' access token expired, will re-upload it from scratch') @@ -1078,6 +1161,107 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute() self.assertRegex(c['manifest_text'], r'^\..* .*:44:file2\n') + def test_put_collection_with_utc_expiring_datetime(self): + tmpdir = self.make_tmpdir() + trash_at = (datetime.datetime.utcnow() + datetime.timedelta(days=90)).strftime('%Y%m%dT%H%MZ') + with open(os.path.join(tmpdir, 'file1'), 'w') as f: + f.write('Relaxing in basins at the end of inlets terminates the endless tests from the box') + col = self.run_and_find_collection( + "", + ['--no-progress', '--trash-at', trash_at, tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + self.assertEqual(ciso8601.parse_datetime(trash_at), + ciso8601.parse_datetime(c['trash_at'])) + + def test_put_collection_with_timezone_aware_expiring_datetime(self): + tmpdir = self.make_tmpdir() + trash_at = (datetime.datetime.utcnow() + datetime.timedelta(days=90)).strftime('%Y%m%dT%H%M-0300') + with open(os.path.join(tmpdir, 'file1'), 'w') as f: + f.write('Relaxing in basins at the end of inlets terminates the endless tests from the box') + col = self.run_and_find_collection( + "", + ['--no-progress', '--trash-at', trash_at, tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + self.assertEqual( + ciso8601.parse_datetime(trash_at).replace(tzinfo=None) + datetime.timedelta(hours=3), + ciso8601.parse_datetime(c['trash_at']).replace(tzinfo=None)) + + def test_put_collection_with_timezone_naive_expiring_datetime(self): + tmpdir = self.make_tmpdir() + trash_at = (datetime.datetime.utcnow() + datetime.timedelta(days=90)).strftime('%Y%m%dT%H%M') + with open(os.path.join(tmpdir, 'file1'), 'w') as f: + f.write('Relaxing in basins at the end of inlets terminates the endless tests from the box') + col = self.run_and_find_collection( + "", + ['--no-progress', '--trash-at', trash_at, tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + if time.daylight: + offset = datetime.timedelta(seconds=time.altzone) + else: + offset = datetime.timedelta(seconds=time.timezone) + self.assertEqual( + ciso8601.parse_datetime(trash_at) + offset, + ciso8601.parse_datetime(c['trash_at']).replace(tzinfo=None)) + + def test_put_collection_with_expiring_date_only(self): + tmpdir = self.make_tmpdir() + trash_at = '2140-01-01' + end_of_day = datetime.timedelta(hours=23, minutes=59, seconds=59) + with open(os.path.join(tmpdir, 'file1'), 'w') as f: + f.write('Relaxing in basins at the end of inlets terminates the endless tests from the box') + col = self.run_and_find_collection( + "", + ['--no-progress', '--trash-at', trash_at, tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + if time.daylight: + offset = datetime.timedelta(seconds=time.altzone) + else: + offset = datetime.timedelta(seconds=time.timezone) + self.assertEqual( + ciso8601.parse_datetime(trash_at) + end_of_day + offset, + ciso8601.parse_datetime(c['trash_at']).replace(tzinfo=None)) + + def test_put_collection_with_invalid_absolute_expiring_datetimes(self): + cases = ['2100', '210010','2100-10', '2100-Oct'] + tmpdir = self.make_tmpdir() + with open(os.path.join(tmpdir, 'file1'), 'w') as f: + f.write('Relaxing in basins at the end of inlets terminates the endless tests from the box') + for test_datetime in cases: + with self.assertRaises(AssertionError): + self.run_and_find_collection( + "", + ['--no-progress', '--trash-at', test_datetime, tmpdir]) + + def test_put_collection_with_relative_expiring_datetime(self): + expire_after = 7 + dt_before = datetime.datetime.utcnow() + datetime.timedelta(days=expire_after) + tmpdir = self.make_tmpdir() + with open(os.path.join(tmpdir, 'file1'), 'w') as f: + f.write('Relaxing in basins at the end of inlets terminates the endless tests from the box') + col = self.run_and_find_collection( + "", + ['--no-progress', '--trash-after', str(expire_after), tmpdir]) + self.assertNotEqual(None, col['uuid']) + dt_after = datetime.datetime.utcnow() + datetime.timedelta(days=expire_after) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + trash_at = ciso8601.parse_datetime(c['trash_at']).replace(tzinfo=None) + self.assertTrue(dt_before < trash_at) + self.assertTrue(dt_after > trash_at) + + def test_put_collection_with_invalid_relative_expiring_datetime(self): + expire_after = 0 # Must be >= 1 + tmpdir = self.make_tmpdir() + with open(os.path.join(tmpdir, 'file1'), 'w') as f: + f.write('Relaxing in basins at the end of inlets terminates the endless tests from the box') + with self.assertRaises(AssertionError): + self.run_and_find_collection( + "", + ['--no-progress', '--trash-after', str(expire_after), tmpdir]) + def test_upload_directory_reference_without_trailing_slash(self): tmpdir1 = self.make_tmpdir() tmpdir2 = self.make_tmpdir() @@ -1204,6 +1388,16 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, r'^\./%s.*:file2.txt' % os.path.basename(tmpdir)) self.assertRegex(c['manifest_text'], r'^.*:file3.txt') + def test_unicode_on_filename(self): + tmpdir = self.make_tmpdir() + fname = u"i❤arvados.txt" + with open(os.path.join(tmpdir, fname), 'w') as f: + f.write("This is a unicode named file") + col = self.run_and_find_collection("", ['--no-progress', tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + self.assertTrue(fname in c['manifest_text'], u"{} does not include {}".format(c['manifest_text'], fname)) + def test_silent_mode_no_errors(self): self.authorize_with('active') tmpdir = self.make_tmpdir()