X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/b1f94004653da79774a8c9afd641ab7b492398a6..688319dca9cbbb27452a0062943bb38b6935254e:/sdk/python/tests/test_arv_put.py diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py index c2eaf12bbb..93cfdc2a36 100644 --- a/sdk/python/tests/test_arv_put.py +++ b/sdk/python/tests/test_arv_put.py @@ -1,3 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + from __future__ import absolute_import from __future__ import division from future import standard_library @@ -5,21 +9,25 @@ standard_library.install_aliases() from builtins import str from builtins import range import apiclient +import datetime +import hashlib +import json +import logging import mock import os import pwd +import random import re +import select import shutil +import signal import subprocess import sys import tempfile import time import unittest -import yaml -import threading -import hashlib -import random import uuid +import yaml import arvados import arvados.commands.put as arv_put @@ -166,7 +174,7 @@ class ArvadosPutResumeCacheTest(ArvadosBaseTestCase): self.last_cache.close() resume_cache = arv_put.ResumeCache(self.last_cache.filename) self.assertNotEqual(None, resume_cache) - self.assertRaises(None, resume_cache.check_cache()) + resume_cache.check_cache() def test_basic_cache_storage(self): thing = ['test', 'list'] @@ -299,9 +307,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, def test_passing_nonexistant_path_raise_exception(self): uuid_str = str(uuid.uuid4()) - cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)]) with self.assertRaises(arv_put.PathDoesNotExistError): - cwriter.start(save_collection=False) + cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)]) def test_writer_works_without_cache(self): cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False) @@ -336,7 +343,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, for expect_count in (None, 8): progression, reporter = self.make_progress_tester() cwriter = arv_put.ArvPutUploadJob([f.name], - reporter=reporter, bytes_expected=expect_count) + reporter=reporter) + cwriter.bytes_expected = expect_count cwriter.start(save_collection=False) cwriter.destroy_cache() self.assertIn((3, expect_count), progression) @@ -492,23 +500,20 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, self.assertGreater(writer.bytes_written, 0) self.assertLess(writer.bytes_written, os.path.getsize(self.large_file_name)) - # Retry the upload using dry_run to check if there is a pending upload - writer2 = arv_put.ArvPutUploadJob([self.large_file_name], - replication_desired=1, - dry_run=True) with self.assertRaises(arv_put.ArvPutUploadIsPending): - writer2.start(save_collection=False) + # Retry the upload using dry_run to check if there is a pending upload + writer2 = arv_put.ArvPutUploadJob([self.large_file_name], + replication_desired=1, + dry_run=True) # Complete the pending upload writer3 = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1) writer3.start(save_collection=False) - # Confirm there's no pending upload with dry_run=True - writer4 = arv_put.ArvPutUploadJob([self.large_file_name], - replication_desired=1, - dry_run=True) with self.assertRaises(arv_put.ArvPutUploadNotPending): - writer4.start(save_collection=False) - writer4.destroy_cache() + # Confirm there's no pending upload with dry_run=True + writer4 = arv_put.ArvPutUploadJob([self.large_file_name], + replication_desired=1, + dry_run=True) # Test obvious cases with self.assertRaises(arv_put.ArvPutUploadIsPending): arv_put.ArvPutUploadJob([self.large_file_name], @@ -527,21 +532,27 @@ class ArvadosExpectedBytesTest(ArvadosBaseTestCase): TEST_SIZE = os.path.getsize(__file__) def test_expected_bytes_for_file(self): + writer = arv_put.ArvPutUploadJob([__file__]) self.assertEqual(self.TEST_SIZE, - arv_put.expected_bytes_for([__file__])) + writer.bytes_expected) def test_expected_bytes_for_tree(self): tree = self.make_tmpdir() shutil.copyfile(__file__, os.path.join(tree, 'one')) shutil.copyfile(__file__, os.path.join(tree, 'two')) + + writer = arv_put.ArvPutUploadJob([tree]) self.assertEqual(self.TEST_SIZE * 2, - arv_put.expected_bytes_for([tree])) + writer.bytes_expected) + writer = arv_put.ArvPutUploadJob([tree, __file__]) self.assertEqual(self.TEST_SIZE * 3, - arv_put.expected_bytes_for([tree, __file__])) + writer.bytes_expected) def test_expected_bytes_for_device(self): - self.assertIsNone(arv_put.expected_bytes_for(['/dev/null'])) - self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null'])) + writer = arv_put.ArvPutUploadJob(['/dev/null']) + self.assertIsNone(writer.bytes_expected) + writer = arv_put.ArvPutUploadJob([__file__, '/dev/null']) + self.assertIsNone(writer.bytes_expected) class ArvadosPutReportTest(ArvadosBaseTestCase): @@ -565,6 +576,47 @@ class ArvadosPutReportTest(ArvadosBaseTestCase): arv_put.human_progress(count, None))) +class ArvPutLogFormatterTest(ArvadosBaseTestCase): + matcher = r'\(X-Request-Id: req-[a-z0-9]{20}\)' + + def setUp(self): + super(ArvPutLogFormatterTest, self).setUp() + self.stderr = tutil.StringIO() + self.loggingHandler = logging.StreamHandler(self.stderr) + self.loggingHandler.setFormatter( + arv_put.ArvPutLogFormatter(arvados.util.new_request_id())) + self.logger = logging.getLogger() + self.logger.addHandler(self.loggingHandler) + self.logger.setLevel(logging.DEBUG) + + def tearDown(self): + self.logger.removeHandler(self.loggingHandler) + self.stderr.close() + self.stderr = None + super(ArvPutLogFormatterTest, self).tearDown() + + def test_request_id_logged_only_once_on_error(self): + self.logger.error('Ooops, something bad happened.') + self.logger.error('Another bad thing just happened.') + log_lines = self.stderr.getvalue().split('\n')[:-1] + self.assertEqual(2, len(log_lines)) + self.assertRegex(log_lines[0], self.matcher) + self.assertNotRegex(log_lines[1], self.matcher) + + def test_request_id_logged_only_once_on_debug(self): + self.logger.debug('This is just a debug message.') + self.logger.debug('Another message, move along.') + log_lines = self.stderr.getvalue().split('\n')[:-1] + self.assertEqual(2, len(log_lines)) + self.assertRegex(log_lines[0], self.matcher) + self.assertNotRegex(log_lines[1], self.matcher) + + def test_request_id_not_logged_on_info(self): + self.logger.info('This should be a useful message') + log_lines = self.stderr.getvalue().split('\n')[:-1] + self.assertEqual(1, len(log_lines)) + self.assertNotRegex(log_lines[0], self.matcher) + class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase, tutil.VersionChecker): @@ -572,8 +624,10 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, Z_UUID = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz' def call_main_with_args(self, args): - self.main_stdout = tutil.StringIO() - self.main_stderr = tutil.StringIO() + self.main_stdout.seek(0, 0) + self.main_stdout.truncate(0) + self.main_stderr.seek(0, 0) + self.main_stderr.truncate(0) return arv_put.main(args, self.main_stdout, self.main_stderr) def call_main_on_test_file(self, args=[]): @@ -589,8 +643,15 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, super(ArvadosPutTest, self).setUp() run_test_server.authorize_with('active') arv_put.api_client = None + self.main_stdout = tutil.StringIO() + self.main_stderr = tutil.StringIO() + self.loggingHandler = logging.StreamHandler(self.main_stderr) + self.loggingHandler.setFormatter( + arv_put.ArvPutLogFormatter(arvados.util.new_request_id())) + logging.getLogger().addHandler(self.loggingHandler) def tearDown(self): + logging.getLogger().removeHandler(self.loggingHandler) for outbuf in ['main_stdout', 'main_stderr']: if hasattr(self, outbuf): getattr(self, outbuf).close() @@ -669,6 +730,18 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, self.call_main_with_args, ['--project-uuid', self.Z_UUID, '--stream']) + def test_error_when_multiple_storage_classes_specified(self): + self.assertRaises(SystemExit, + self.call_main_with_args, + ['--storage-classes', 'hot,cold']) + + def test_error_when_excluding_absolute_path(self): + tmpdir = self.make_tmpdir() + self.assertRaises(SystemExit, + self.call_main_with_args, + ['--exclude', '/some/absolute/path/*', + tmpdir]) + def test_api_error_handling(self): coll_save_mock = mock.Mock(name='arv.collection.Collection().save_new()') coll_save_mock.side_effect = arvados.errors.ApiError( @@ -681,6 +754,18 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, self.assertLess(0, coll_save_mock.call_count) self.assertEqual("", self.main_stdout.getvalue()) + def test_request_id_logging_on_error(self): + matcher = r'\(X-Request-Id: req-[a-z0-9]{20}\)\n' + coll_save_mock = mock.Mock(name='arv.collection.Collection().save_new()') + coll_save_mock.side_effect = arvados.errors.ApiError( + fake_httplib2_response(403), b'{}') + with mock.patch('arvados.collection.Collection.save_new', + new=coll_save_mock): + with self.assertRaises(SystemExit) as exc_test: + self.call_main_with_args(['/dev/null']) + self.assertRegex( + self.main_stderr.getvalue(), matcher) + class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase): @@ -713,6 +798,9 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, cls.ENVIRON = os.environ.copy() cls.ENVIRON['PYTHONPATH'] = ':'.join(sys.path) + def datetime_to_hex(self, dt): + return hex(int(time.mktime(dt.timetuple())))[2:] + def setUp(self): super(ArvPutIntegrationTest, self).setUp() arv_put.api_client = None @@ -775,6 +863,30 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, self.assertIn('4a9c8b735dce4b5fa3acf221a0b13628+11', pipe.stdout.read().decode()) + def test_sigint_logs_request_id(self): + # Start arv-put, give it a chance to start up, send SIGINT, + # and check that its output includes the X-Request-Id. + input_stream = subprocess.Popen( + ['sleep', '10'], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pipe = subprocess.Popen( + [sys.executable, arv_put.__file__, '--stream'], + stdin=input_stream.stdout, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, env=self.ENVIRON) + # Wait for arv-put child process to print something (i.e., a + # log message) so we know its signal handler is installed. + select.select([pipe.stdout], [], [], 10) + pipe.send_signal(signal.SIGINT) + deadline = time.time() + 5 + while (pipe.poll() is None) and (time.time() < deadline): + time.sleep(.1) + returncode = pipe.poll() + input_stream.terminate() + if returncode is None: + pipe.terminate() + self.fail("arv-put did not exit within 5 seconds") + self.assertRegex(pipe.stdout.read().decode(), r'\(X-Request-Id: req-[a-z0-9]{20}\)') + def test_ArvPutSignedManifest(self): # ArvPutSignedManifest runs "arv-put foo" and then attempts to get # the newly created manifest from the API server, testing to confirm @@ -826,6 +938,49 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, self.assertEqual(1, len(collection_list)) return collection_list[0] + def test_expired_token_invalidates_cache(self): + self.authorize_with('active') + tmpdir = self.make_tmpdir() + with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f: + f.write('foo') + # Upload a directory and get the cache file name + p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.ENVIRON) + (out, err) = p.communicate() + self.assertRegex(err.decode(), r'INFO: Creating new cache file at ') + self.assertEqual(p.returncode, 0) + cache_filepath = re.search(r'INFO: Creating new cache file at (.*)', + err.decode()).groups()[0] + self.assertTrue(os.path.isfile(cache_filepath)) + # Load the cache file contents and modify the manifest to simulate + # an expired access token + with open(cache_filepath, 'r') as c: + cache = json.load(c) + self.assertRegex(cache['manifest'], r'\+A\S+\@') + a_month_ago = datetime.datetime.now() - datetime.timedelta(days=30) + cache['manifest'] = re.sub( + r'\@.*? ', + "@{} ".format(self.datetime_to_hex(a_month_ago)), + cache['manifest']) + with open(cache_filepath, 'w') as c: + c.write(json.dumps(cache)) + # Re-run the upload and expect to get an invalid cache message + p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.ENVIRON) + (out, err) = p.communicate() + self.assertRegex( + err.decode(), + r'WARNING: Uploaded file .* access token expired, will re-upload it from scratch') + self.assertEqual(p.returncode, 0) + # Confirm that the resulting cache is different from the last run. + with open(cache_filepath, 'r') as c2: + new_cache = json.load(c2) + self.assertNotEqual(cache['manifest'], new_cache['manifest']) + def test_put_collection_with_later_update(self): tmpdir = self.make_tmpdir() with open(os.path.join(tmpdir, 'file1'), 'w') as f: @@ -911,6 +1066,88 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, '--project-uuid', self.PROJECT_UUID]) self.assertEqual(link_name, collection['name']) + def test_put_collection_with_storage_classes_specified(self): + collection = self.run_and_find_collection("", ['--storage-classes', 'hot']) + + self.assertEqual(len(collection['storage_classes_desired']), 1) + self.assertEqual(collection['storage_classes_desired'][0], 'hot') + + def test_put_collection_without_storage_classes_specified(self): + collection = self.run_and_find_collection("") + + self.assertEqual(len(collection['storage_classes_desired']), 1) + self.assertEqual(collection['storage_classes_desired'][0], 'default') + + def test_exclude_filename_pattern(self): + tmpdir = self.make_tmpdir() + tmpsubdir = os.path.join(tmpdir, 'subdir') + os.mkdir(tmpsubdir) + for fname in ['file1', 'file2', 'file3']: + with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + col = self.run_and_find_collection("", ['--no-progress', + '--exclude', '*2.txt', + '--exclude', 'file3.*', + tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # None of the file2.txt & file3.txt should have been uploaded + self.assertRegex(c['manifest_text'], r'^.*:file1.txt') + self.assertNotRegex(c['manifest_text'], r'^.*:file2.txt') + self.assertNotRegex(c['manifest_text'], r'^.*:file3.txt') + + def test_exclude_filepath_pattern(self): + tmpdir = self.make_tmpdir() + tmpsubdir = os.path.join(tmpdir, 'subdir') + os.mkdir(tmpsubdir) + for fname in ['file1', 'file2', 'file3']: + with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + col = self.run_and_find_collection("", ['--no-progress', + '--exclude', 'subdir/*2.txt', + '--exclude', './file1.*', + tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # Only tmpdir/file1.txt & tmpdir/subdir/file2.txt should have been excluded + self.assertNotRegex(c['manifest_text'], + r'^\./%s.*:file1.txt' % os.path.basename(tmpdir)) + self.assertNotRegex(c['manifest_text'], + r'^\./%s/subdir.*:file2.txt' % os.path.basename(tmpdir)) + self.assertRegex(c['manifest_text'], + r'^\./%s.*:file2.txt' % os.path.basename(tmpdir)) + self.assertRegex(c['manifest_text'], r'^.*:file3.txt') + + def test_silent_mode_no_errors(self): + self.authorize_with('active') + tmpdir = self.make_tmpdir() + with open(os.path.join(tmpdir, 'test.txt'), 'w') as f: + f.write('hello world') + pipe = subprocess.Popen( + [sys.executable, arv_put.__file__] + ['--silent', tmpdir], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, env=self.ENVIRON) + stdout, stderr = pipe.communicate() + # No console output should occur on normal operations + self.assertNotRegex(stderr.decode(), r'.+') + self.assertNotRegex(stdout.decode(), r'.+') + + def test_silent_mode_does_not_avoid_error_messages(self): + self.authorize_with('active') + pipe = subprocess.Popen( + [sys.executable, arv_put.__file__] + ['--silent', + '/path/not/existant'], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, env=self.ENVIRON) + stdout, stderr = pipe.communicate() + # Error message should be displayed when errors happen + self.assertRegex(stderr.decode(), r'.*ERROR:.*') + self.assertNotRegex(stdout.decode(), r'.+') + if __name__ == '__main__': unittest.main()