X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/e61df48303291900581ef4a64dcf97864598c5f4..0eb72b526bf8bbb011551ecf019f604e17a534f1:/sdk/python/tests/test_arv_put.py diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py index 5abf38854a..b8065ef3aa 100644 --- a/sdk/python/tests/test_arv_put.py +++ b/sdk/python/tests/test_arv_put.py @@ -1,5 +1,6 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 from __future__ import absolute_import from __future__ import division @@ -7,9 +8,7 @@ from future import standard_library standard_library.install_aliases() from builtins import str from builtins import range -from past.utils import old_div import apiclient -import io import mock import os import pwd @@ -24,8 +23,7 @@ import yaml import threading import hashlib import random - -from io import StringIO +import uuid import arvados import arvados.commands.put as arv_put @@ -264,8 +262,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, _, self.large_file_name = tempfile.mkstemp() fileobj = open(self.large_file_name, 'w') # Make sure to write just a little more than one block - for _ in range((old_div(arvados.config.KEEP_BLOCK_SIZE,(1024*1024)))+1): - data = random.choice(['x', 'y', 'z']) * 1024 * 1024 # 1 MB + for _ in range((arvados.config.KEEP_BLOCK_SIZE>>20)+1): + data = random.choice(['x', 'y', 'z']) * 1024 * 1024 # 1 MiB fileobj.write(data) fileobj.close() # Temp dir containing small files to be repacked @@ -275,12 +273,38 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, with open(os.path.join(self.small_files_dir, str(i)), 'w') as f: f.write(data + str(i)) self.arvfile_write = getattr(arvados.arvfile.ArvadosFileWriter, 'write') + # Temp dir to hold a symlink to other temp dir + self.tempdir_with_symlink = tempfile.mkdtemp() + os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir')) + os.symlink(os.path.join(self.tempdir, '1'), + os.path.join(self.tempdir_with_symlink, 'linkedfile')) def tearDown(self): super(ArvPutUploadJobTest, self).tearDown() shutil.rmtree(self.tempdir) os.unlink(self.large_file_name) shutil.rmtree(self.small_files_dir) + shutil.rmtree(self.tempdir_with_symlink) + + def test_symlinks_are_followed_by_default(self): + cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink]) + cwriter.start(save_collection=False) + self.assertIn('linkeddir', cwriter.manifest_text()) + self.assertIn('linkedfile', cwriter.manifest_text()) + cwriter.destroy_cache() + + def test_symlinks_are_not_followed_when_requested(self): + cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink], + follow_links=False) + cwriter.start(save_collection=False) + self.assertNotIn('linkeddir', cwriter.manifest_text()) + self.assertNotIn('linkedfile', cwriter.manifest_text()) + cwriter.destroy_cache() + + def test_passing_nonexistant_path_raise_exception(self): + uuid_str = str(uuid.uuid4()) + with self.assertRaises(arv_put.PathDoesNotExistError): + cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)]) def test_writer_works_without_cache(self): cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False) @@ -289,16 +313,18 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, def test_writer_works_with_cache(self): with tempfile.NamedTemporaryFile() as f: - f.write('foo') + f.write(b'foo') f.flush() cwriter = arv_put.ArvPutUploadJob([f.name]) cwriter.start(save_collection=False) - self.assertEqual(3, cwriter.bytes_written - cwriter.bytes_skipped) + self.assertEqual(0, cwriter.bytes_skipped) + self.assertEqual(3, cwriter.bytes_written) # Don't destroy the cache, and start another upload cwriter_new = arv_put.ArvPutUploadJob([f.name]) cwriter_new.start(save_collection=False) cwriter_new.destroy_cache() - self.assertEqual(0, cwriter_new.bytes_written - cwriter_new.bytes_skipped) + self.assertEqual(3, cwriter_new.bytes_skipped) + self.assertEqual(3, cwriter_new.bytes_written) def make_progress_tester(self): progression = [] @@ -308,12 +334,13 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, def test_progress_reporting(self): with tempfile.NamedTemporaryFile() as f: - f.write('foo') + f.write(b'foo') f.flush() for expect_count in (None, 8): progression, reporter = self.make_progress_tester() cwriter = arv_put.ArvPutUploadJob([f.name], - reporter=reporter, bytes_expected=expect_count) + reporter=reporter) + cwriter.bytes_expected = expect_count cwriter.start(save_collection=False) cwriter.destroy_cache() self.assertIn((3, expect_count), progression) @@ -469,23 +496,20 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, self.assertGreater(writer.bytes_written, 0) self.assertLess(writer.bytes_written, os.path.getsize(self.large_file_name)) - # Retry the upload using dry_run to check if there is a pending upload - writer2 = arv_put.ArvPutUploadJob([self.large_file_name], - replication_desired=1, - dry_run=True) with self.assertRaises(arv_put.ArvPutUploadIsPending): - writer2.start(save_collection=False) + # Retry the upload using dry_run to check if there is a pending upload + writer2 = arv_put.ArvPutUploadJob([self.large_file_name], + replication_desired=1, + dry_run=True) # Complete the pending upload writer3 = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1) writer3.start(save_collection=False) - # Confirm there's no pending upload with dry_run=True - writer4 = arv_put.ArvPutUploadJob([self.large_file_name], - replication_desired=1, - dry_run=True) with self.assertRaises(arv_put.ArvPutUploadNotPending): - writer4.start(save_collection=False) - writer4.destroy_cache() + # Confirm there's no pending upload with dry_run=True + writer4 = arv_put.ArvPutUploadJob([self.large_file_name], + replication_desired=1, + dry_run=True) # Test obvious cases with self.assertRaises(arv_put.ArvPutUploadIsPending): arv_put.ArvPutUploadJob([self.large_file_name], @@ -504,21 +528,27 @@ class ArvadosExpectedBytesTest(ArvadosBaseTestCase): TEST_SIZE = os.path.getsize(__file__) def test_expected_bytes_for_file(self): + writer = arv_put.ArvPutUploadJob([__file__]) self.assertEqual(self.TEST_SIZE, - arv_put.expected_bytes_for([__file__])) + writer.bytes_expected) def test_expected_bytes_for_tree(self): tree = self.make_tmpdir() shutil.copyfile(__file__, os.path.join(tree, 'one')) shutil.copyfile(__file__, os.path.join(tree, 'two')) + + writer = arv_put.ArvPutUploadJob([tree]) self.assertEqual(self.TEST_SIZE * 2, - arv_put.expected_bytes_for([tree])) + writer.bytes_expected) + writer = arv_put.ArvPutUploadJob([tree, __file__]) self.assertEqual(self.TEST_SIZE * 3, - arv_put.expected_bytes_for([tree, __file__])) + writer.bytes_expected) def test_expected_bytes_for_device(self): - self.assertIsNone(arv_put.expected_bytes_for(['/dev/null'])) - self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null'])) + writer = arv_put.ArvPutUploadJob(['/dev/null']) + self.assertIsNone(writer.bytes_expected) + writer = arv_put.ArvPutUploadJob([__file__, '/dev/null']) + self.assertIsNone(writer.bytes_expected) class ArvadosPutReportTest(ArvadosBaseTestCase): @@ -531,7 +561,7 @@ class ArvadosPutReportTest(ArvadosBaseTestCase): def test_known_human_progress(self): for count, total in [(0, 1), (2, 4), (45, 60)]: - expect = '{:.1%}'.format(old_div(float(count), total)) + expect = '{:.1%}'.format(1.0*count/total) actual = arv_put.human_progress(count, total) self.assertTrue(actual.startswith('\r')) self.assertIn(expect, actual) @@ -542,13 +572,15 @@ class ArvadosPutReportTest(ArvadosBaseTestCase): arv_put.human_progress(count, None))) -class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase): +class ArvadosPutTest(run_test_server.TestCaseWithServers, + ArvadosBaseTestCase, + tutil.VersionChecker): MAIN_SERVER = {} Z_UUID = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz' def call_main_with_args(self, args): - self.main_stdout = StringIO() - self.main_stderr = StringIO() + self.main_stdout = tutil.StringIO() + self.main_stderr = tutil.StringIO() return arv_put.main(args, self.main_stdout, self.main_stderr) def call_main_on_test_file(self, args=[]): @@ -573,13 +605,11 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase): super(ArvadosPutTest, self).tearDown() def test_version_argument(self): - err = io.BytesIO() - out = io.BytesIO() - with tutil.redirected_streams(stdout=out, stderr=err): + with tutil.redirected_streams( + stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err): with self.assertRaises(SystemExit): self.call_main_with_args(['--version']) - self.assertEqual(out.getvalue(), '') - self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+") + self.assertVersionOutput(out, err) def test_simple_file_put(self): self.call_main_on_test_file() @@ -646,10 +676,17 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase): self.call_main_with_args, ['--project-uuid', self.Z_UUID, '--stream']) + def test_error_when_excluding_absolute_path(self): + tmpdir = self.make_tmpdir() + self.assertRaises(SystemExit, + self.call_main_with_args, + ['--exclude', '/some/absolute/path/*', + tmpdir]) + def test_api_error_handling(self): coll_save_mock = mock.Mock(name='arv.collection.Collection().save_new()') coll_save_mock.side_effect = arvados.errors.ApiError( - fake_httplib2_response(403), '{}') + fake_httplib2_response(403), b'{}') with mock.patch('arvados.collection.Collection.save_new', new=coll_save_mock): with self.assertRaises(SystemExit) as exc_test: @@ -717,7 +754,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, result = arv_put.desired_project_uuid(arv_put.api_client, BAD_UUID, 0) except ValueError as error: - self.assertIn(BAD_UUID, error.message) + self.assertIn(BAD_UUID, str(error)) else: self.assertFalse(result, "incorrectly found nonexistent project") @@ -737,7 +774,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, [sys.executable, arv_put.__file__, '--stream'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=self.ENVIRON) - pipe.stdin.write('stdin test\n') + pipe.stdin.write(b'stdin test\n') pipe.stdin.close() deadline = time.time() + 5 while (pipe.poll() is None) and (time.time() < deadline): @@ -749,7 +786,8 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, elif returncode != 0: sys.stdout.write(pipe.stdout.read()) self.fail("arv-put returned exit code {}".format(returncode)) - self.assertIn('4a9c8b735dce4b5fa3acf221a0b13628+11', pipe.stdout.read()) + self.assertIn('4a9c8b735dce4b5fa3acf221a0b13628+11', + pipe.stdout.read().decode()) def test_ArvPutSignedManifest(self): # ArvPutSignedManifest runs "arv-put foo" and then attempts to get @@ -767,16 +805,19 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, datadir = self.make_tmpdir() with open(os.path.join(datadir, "foo"), "w") as f: f.write("The quick brown fox jumped over the lazy dog") - p = subprocess.Popen([sys.executable, arv_put.__file__, datadir], - stdout=subprocess.PIPE, env=self.ENVIRON) - (arvout, arverr) = p.communicate() - self.assertEqual(arverr, None) + p = subprocess.Popen([sys.executable, arv_put.__file__, + os.path.join(datadir, 'foo')], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.ENVIRON) + (out, err) = p.communicate() + self.assertRegex(err.decode(), r'INFO: Collection saved as ') self.assertEqual(p.returncode, 0) # The manifest text stored in the API server under the same # manifest UUID must use signed locators. c = arv_put.api_client.collections().get(uuid=manifest_uuid).execute() - self.assertRegexpMatches( + self.assertRegex( c['manifest_text'], r'^\. 08a008a01d498c404b0c30852b39d3b8\+44\+A[0-9a-f]+@[0-9a-f]+ 0:44:foo\n') @@ -789,11 +830,13 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, [sys.executable, arv_put.__file__] + extra_args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.ENVIRON) - stdout, stderr = pipe.communicate(text) + stdout, stderr = pipe.communicate(text.encode()) + self.assertRegex(stderr.decode(), r'INFO: Collection (updated:|saved as)') search_key = ('portable_data_hash' if '--portable-data-hash' in extra_args else 'uuid') collection_list = arvados.api('v1').collections().list( - filters=[[search_key, '=', stdout.strip()]]).execute().get('items', []) + filters=[[search_key, '=', stdout.decode().strip()]] + ).execute().get('items', []) self.assertEqual(1, len(collection_list)) return collection_list[0] @@ -810,7 +853,40 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, self.assertEqual(col['uuid'], updated_col['uuid']) # Get the manifest and check that the new file is being included c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute() - self.assertRegexpMatches(c['manifest_text'], r'^\. .*:44:file2\n') + self.assertRegex(c['manifest_text'], r'^\..* .*:44:file2\n') + + def test_upload_directory_reference_without_trailing_slash(self): + tmpdir1 = self.make_tmpdir() + tmpdir2 = self.make_tmpdir() + with open(os.path.join(tmpdir1, 'foo'), 'w') as f: + f.write('This is foo') + with open(os.path.join(tmpdir2, 'bar'), 'w') as f: + f.write('This is not foo') + # Upload one directory and one file + col = self.run_and_find_collection("", ['--no-progress', + tmpdir1, + os.path.join(tmpdir2, 'bar')]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # Check that 'foo' was written inside a subcollection + # OTOH, 'bar' should have been directly uploaded on the root collection + self.assertRegex(c['manifest_text'], r'^\. .*:15:bar\n\./.+ .*:11:foo\n') + + def test_upload_directory_reference_with_trailing_slash(self): + tmpdir1 = self.make_tmpdir() + tmpdir2 = self.make_tmpdir() + with open(os.path.join(tmpdir1, 'foo'), 'w') as f: + f.write('This is foo') + with open(os.path.join(tmpdir2, 'bar'), 'w') as f: + f.write('This is not foo') + # Upload one directory (with trailing slash) and one file + col = self.run_and_find_collection("", ['--no-progress', + tmpdir1 + os.sep, + os.path.join(tmpdir2, 'bar')]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # Check that 'foo' and 'bar' were written at the same level + self.assertRegex(c['manifest_text'], r'^\. .*:15:bar .*:11:foo\n') def test_put_collection_with_high_redundancy(self): # Write empty data: we're not testing CollectionWriter, just @@ -828,7 +904,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, "Test unnamed collection", ['--portable-data-hash', '--project-uuid', self.PROJECT_UUID]) username = pwd.getpwuid(os.getuid()).pw_name - self.assertRegexpMatches( + self.assertRegex( link['name'], r'^Saved at .* by {}@'.format(re.escape(username))) @@ -849,6 +925,50 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, '--project-uuid', self.PROJECT_UUID]) self.assertEqual(link_name, collection['name']) + def test_exclude_filename_pattern(self): + tmpdir = self.make_tmpdir() + tmpsubdir = os.path.join(tmpdir, 'subdir') + os.mkdir(tmpsubdir) + for fname in ['file1', 'file2', 'file3']: + with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + col = self.run_and_find_collection("", ['--no-progress', + '--exclude', '*2.txt', + '--exclude', 'file3.*', + tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # None of the file2.txt & file3.txt should have been uploaded + self.assertRegex(c['manifest_text'], r'^.*:file1.txt') + self.assertNotRegex(c['manifest_text'], r'^.*:file2.txt') + self.assertNotRegex(c['manifest_text'], r'^.*:file3.txt') + + def test_exclude_filepath_pattern(self): + tmpdir = self.make_tmpdir() + tmpsubdir = os.path.join(tmpdir, 'subdir') + os.mkdir(tmpsubdir) + for fname in ['file1', 'file2', 'file3']: + with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + col = self.run_and_find_collection("", ['--no-progress', + '--exclude', 'subdir/*2.txt', + '--exclude', './file1.*', + tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # Only tmpdir/file1.txt & tmpdir/subdir/file2.txt should have been excluded + self.assertNotRegex(c['manifest_text'], + r'^\./%s.*:file1.txt' % os.path.basename(tmpdir)) + self.assertNotRegex(c['manifest_text'], + r'^\./%s/subdir.*:file2.txt' % os.path.basename(tmpdir)) + self.assertRegex(c['manifest_text'], + r'^\./%s.*:file2.txt' % os.path.basename(tmpdir)) + self.assertRegex(c['manifest_text'], r'^.*:file3.txt') + if __name__ == '__main__': unittest.main()