X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/c633f38b9b53c30082407b1f1d9d8c7738c9d680..f873463bfea88575f98e7a36b26ffbae9aa28aa6:/sdk/python/tests/test_arv_put.py diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py index e0ff7a772d..346167846c 100644 --- a/sdk/python/tests/test_arv_put.py +++ b/sdk/python/tests/test_arv_put.py @@ -1,5 +1,6 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 from __future__ import absolute_import from __future__ import division @@ -8,20 +9,23 @@ standard_library.install_aliases() from builtins import str from builtins import range import apiclient +import datetime +import hashlib +import json import mock import os import pwd +import random import re import shutil import subprocess import sys import tempfile +import threading import time import unittest +import uuid import yaml -import threading -import hashlib -import random import arvados import arvados.commands.put as arv_put @@ -168,7 +172,7 @@ class ArvadosPutResumeCacheTest(ArvadosBaseTestCase): self.last_cache.close() resume_cache = arv_put.ResumeCache(self.last_cache.filename) self.assertNotEqual(None, resume_cache) - self.assertRaises(None, resume_cache.check_cache()) + resume_cache.check_cache() def test_basic_cache_storage(self): thing = ['test', 'list'] @@ -271,12 +275,38 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, with open(os.path.join(self.small_files_dir, str(i)), 'w') as f: f.write(data + str(i)) self.arvfile_write = getattr(arvados.arvfile.ArvadosFileWriter, 'write') + # Temp dir to hold a symlink to other temp dir + self.tempdir_with_symlink = tempfile.mkdtemp() + os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir')) + os.symlink(os.path.join(self.tempdir, '1'), + os.path.join(self.tempdir_with_symlink, 'linkedfile')) def tearDown(self): super(ArvPutUploadJobTest, self).tearDown() shutil.rmtree(self.tempdir) os.unlink(self.large_file_name) shutil.rmtree(self.small_files_dir) + shutil.rmtree(self.tempdir_with_symlink) + + def test_symlinks_are_followed_by_default(self): + cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink]) + cwriter.start(save_collection=False) + self.assertIn('linkeddir', cwriter.manifest_text()) + self.assertIn('linkedfile', cwriter.manifest_text()) + cwriter.destroy_cache() + + def test_symlinks_are_not_followed_when_requested(self): + cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink], + follow_links=False) + cwriter.start(save_collection=False) + self.assertNotIn('linkeddir', cwriter.manifest_text()) + self.assertNotIn('linkedfile', cwriter.manifest_text()) + cwriter.destroy_cache() + + def test_passing_nonexistant_path_raise_exception(self): + uuid_str = str(uuid.uuid4()) + with self.assertRaises(arv_put.PathDoesNotExistError): + cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)]) def test_writer_works_without_cache(self): cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False) @@ -289,12 +319,14 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, f.flush() cwriter = arv_put.ArvPutUploadJob([f.name]) cwriter.start(save_collection=False) - self.assertEqual(3, cwriter.bytes_written - cwriter.bytes_skipped) + self.assertEqual(0, cwriter.bytes_skipped) + self.assertEqual(3, cwriter.bytes_written) # Don't destroy the cache, and start another upload cwriter_new = arv_put.ArvPutUploadJob([f.name]) cwriter_new.start(save_collection=False) cwriter_new.destroy_cache() - self.assertEqual(0, cwriter_new.bytes_written - cwriter_new.bytes_skipped) + self.assertEqual(3, cwriter_new.bytes_skipped) + self.assertEqual(3, cwriter_new.bytes_written) def make_progress_tester(self): progression = [] @@ -309,7 +341,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, for expect_count in (None, 8): progression, reporter = self.make_progress_tester() cwriter = arv_put.ArvPutUploadJob([f.name], - reporter=reporter, bytes_expected=expect_count) + reporter=reporter) + cwriter.bytes_expected = expect_count cwriter.start(save_collection=False) cwriter.destroy_cache() self.assertIn((3, expect_count), progression) @@ -465,23 +498,20 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, self.assertGreater(writer.bytes_written, 0) self.assertLess(writer.bytes_written, os.path.getsize(self.large_file_name)) - # Retry the upload using dry_run to check if there is a pending upload - writer2 = arv_put.ArvPutUploadJob([self.large_file_name], - replication_desired=1, - dry_run=True) with self.assertRaises(arv_put.ArvPutUploadIsPending): - writer2.start(save_collection=False) + # Retry the upload using dry_run to check if there is a pending upload + writer2 = arv_put.ArvPutUploadJob([self.large_file_name], + replication_desired=1, + dry_run=True) # Complete the pending upload writer3 = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1) writer3.start(save_collection=False) - # Confirm there's no pending upload with dry_run=True - writer4 = arv_put.ArvPutUploadJob([self.large_file_name], - replication_desired=1, - dry_run=True) with self.assertRaises(arv_put.ArvPutUploadNotPending): - writer4.start(save_collection=False) - writer4.destroy_cache() + # Confirm there's no pending upload with dry_run=True + writer4 = arv_put.ArvPutUploadJob([self.large_file_name], + replication_desired=1, + dry_run=True) # Test obvious cases with self.assertRaises(arv_put.ArvPutUploadIsPending): arv_put.ArvPutUploadJob([self.large_file_name], @@ -500,21 +530,27 @@ class ArvadosExpectedBytesTest(ArvadosBaseTestCase): TEST_SIZE = os.path.getsize(__file__) def test_expected_bytes_for_file(self): + writer = arv_put.ArvPutUploadJob([__file__]) self.assertEqual(self.TEST_SIZE, - arv_put.expected_bytes_for([__file__])) + writer.bytes_expected) def test_expected_bytes_for_tree(self): tree = self.make_tmpdir() shutil.copyfile(__file__, os.path.join(tree, 'one')) shutil.copyfile(__file__, os.path.join(tree, 'two')) + + writer = arv_put.ArvPutUploadJob([tree]) self.assertEqual(self.TEST_SIZE * 2, - arv_put.expected_bytes_for([tree])) + writer.bytes_expected) + writer = arv_put.ArvPutUploadJob([tree, __file__]) self.assertEqual(self.TEST_SIZE * 3, - arv_put.expected_bytes_for([tree, __file__])) + writer.bytes_expected) def test_expected_bytes_for_device(self): - self.assertIsNone(arv_put.expected_bytes_for(['/dev/null'])) - self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null'])) + writer = arv_put.ArvPutUploadJob(['/dev/null']) + self.assertIsNone(writer.bytes_expected) + writer = arv_put.ArvPutUploadJob([__file__, '/dev/null']) + self.assertIsNone(writer.bytes_expected) class ArvadosPutReportTest(ArvadosBaseTestCase): @@ -642,6 +678,13 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, self.call_main_with_args, ['--project-uuid', self.Z_UUID, '--stream']) + def test_error_when_excluding_absolute_path(self): + tmpdir = self.make_tmpdir() + self.assertRaises(SystemExit, + self.call_main_with_args, + ['--exclude', '/some/absolute/path/*', + tmpdir]) + def test_api_error_handling(self): coll_save_mock = mock.Mock(name='arv.collection.Collection().save_new()') coll_save_mock.side_effect = arvados.errors.ApiError( @@ -686,6 +729,9 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, cls.ENVIRON = os.environ.copy() cls.ENVIRON['PYTHONPATH'] = ':'.join(sys.path) + def datetime_to_hex(self, dt): + return hex(int(time.mktime(dt.timetuple())))[2:] + def setUp(self): super(ArvPutIntegrationTest, self).setUp() arv_put.api_client = None @@ -764,10 +810,13 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, datadir = self.make_tmpdir() with open(os.path.join(datadir, "foo"), "w") as f: f.write("The quick brown fox jumped over the lazy dog") - p = subprocess.Popen([sys.executable, arv_put.__file__, datadir], - stdout=subprocess.PIPE, env=self.ENVIRON) - (arvout, arverr) = p.communicate() - self.assertEqual(arverr, None) + p = subprocess.Popen([sys.executable, arv_put.__file__, + os.path.join(datadir, 'foo')], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.ENVIRON) + (out, err) = p.communicate() + self.assertRegex(err.decode(), r'INFO: Collection saved as ') self.assertEqual(p.returncode, 0) # The manifest text stored in the API server under the same @@ -796,6 +845,49 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, self.assertEqual(1, len(collection_list)) return collection_list[0] + def test_expired_token_invalidates_cache(self): + self.authorize_with('active') + tmpdir = self.make_tmpdir() + with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f: + f.write('foo') + # Upload a directory and get the cache file name + p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.ENVIRON) + (out, err) = p.communicate() + self.assertRegex(err.decode(), r'INFO: Creating new cache file at ') + self.assertEqual(p.returncode, 0) + cache_filepath = re.search(r'INFO: Creating new cache file at (.*)', + err.decode()).groups()[0] + self.assertTrue(os.path.isfile(cache_filepath)) + # Load the cache file contents and modify the manifest to simulate + # an expired access token + with open(cache_filepath, 'r') as c: + cache = json.load(c) + self.assertRegex(cache['manifest'], r'\+A\S+\@') + a_month_ago = datetime.datetime.now() - datetime.timedelta(days=30) + cache['manifest'] = re.sub( + r'\@.*? ', + "@{} ".format(self.datetime_to_hex(a_month_ago)), + cache['manifest']) + with open(cache_filepath, 'w') as c: + c.write(json.dumps(cache)) + # Re-run the upload and expect to get an invalid cache message + p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.ENVIRON) + (out, err) = p.communicate() + self.assertRegex( + err.decode(), + r'WARNING: Uploaded file .* access token expired, will re-upload it from scratch') + self.assertEqual(p.returncode, 0) + # Confirm that the resulting cache is different from the last run. + with open(cache_filepath, 'r') as c2: + new_cache = json.load(c2) + self.assertNotEqual(cache['manifest'], new_cache['manifest']) + def test_put_collection_with_later_update(self): tmpdir = self.make_tmpdir() with open(os.path.join(tmpdir, 'file1'), 'w') as f: @@ -809,7 +901,40 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, self.assertEqual(col['uuid'], updated_col['uuid']) # Get the manifest and check that the new file is being included c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute() - self.assertRegex(c['manifest_text'], r'^\. .*:44:file2\n') + self.assertRegex(c['manifest_text'], r'^\..* .*:44:file2\n') + + def test_upload_directory_reference_without_trailing_slash(self): + tmpdir1 = self.make_tmpdir() + tmpdir2 = self.make_tmpdir() + with open(os.path.join(tmpdir1, 'foo'), 'w') as f: + f.write('This is foo') + with open(os.path.join(tmpdir2, 'bar'), 'w') as f: + f.write('This is not foo') + # Upload one directory and one file + col = self.run_and_find_collection("", ['--no-progress', + tmpdir1, + os.path.join(tmpdir2, 'bar')]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # Check that 'foo' was written inside a subcollection + # OTOH, 'bar' should have been directly uploaded on the root collection + self.assertRegex(c['manifest_text'], r'^\. .*:15:bar\n\./.+ .*:11:foo\n') + + def test_upload_directory_reference_with_trailing_slash(self): + tmpdir1 = self.make_tmpdir() + tmpdir2 = self.make_tmpdir() + with open(os.path.join(tmpdir1, 'foo'), 'w') as f: + f.write('This is foo') + with open(os.path.join(tmpdir2, 'bar'), 'w') as f: + f.write('This is not foo') + # Upload one directory (with trailing slash) and one file + col = self.run_and_find_collection("", ['--no-progress', + tmpdir1 + os.sep, + os.path.join(tmpdir2, 'bar')]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # Check that 'foo' and 'bar' were written at the same level + self.assertRegex(c['manifest_text'], r'^\. .*:15:bar .*:11:foo\n') def test_put_collection_with_high_redundancy(self): # Write empty data: we're not testing CollectionWriter, just @@ -848,6 +973,76 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, '--project-uuid', self.PROJECT_UUID]) self.assertEqual(link_name, collection['name']) + def test_exclude_filename_pattern(self): + tmpdir = self.make_tmpdir() + tmpsubdir = os.path.join(tmpdir, 'subdir') + os.mkdir(tmpsubdir) + for fname in ['file1', 'file2', 'file3']: + with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + col = self.run_and_find_collection("", ['--no-progress', + '--exclude', '*2.txt', + '--exclude', 'file3.*', + tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # None of the file2.txt & file3.txt should have been uploaded + self.assertRegex(c['manifest_text'], r'^.*:file1.txt') + self.assertNotRegex(c['manifest_text'], r'^.*:file2.txt') + self.assertNotRegex(c['manifest_text'], r'^.*:file3.txt') + + def test_exclude_filepath_pattern(self): + tmpdir = self.make_tmpdir() + tmpsubdir = os.path.join(tmpdir, 'subdir') + os.mkdir(tmpsubdir) + for fname in ['file1', 'file2', 'file3']: + with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + col = self.run_and_find_collection("", ['--no-progress', + '--exclude', 'subdir/*2.txt', + '--exclude', './file1.*', + tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # Only tmpdir/file1.txt & tmpdir/subdir/file2.txt should have been excluded + self.assertNotRegex(c['manifest_text'], + r'^\./%s.*:file1.txt' % os.path.basename(tmpdir)) + self.assertNotRegex(c['manifest_text'], + r'^\./%s/subdir.*:file2.txt' % os.path.basename(tmpdir)) + self.assertRegex(c['manifest_text'], + r'^\./%s.*:file2.txt' % os.path.basename(tmpdir)) + self.assertRegex(c['manifest_text'], r'^.*:file3.txt') + + def test_silent_mode_no_errors(self): + self.authorize_with('active') + tmpdir = self.make_tmpdir() + with open(os.path.join(tmpdir, 'test.txt'), 'w') as f: + f.write('hello world') + pipe = subprocess.Popen( + [sys.executable, arv_put.__file__] + ['--silent', tmpdir], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, env=self.ENVIRON) + stdout, stderr = pipe.communicate() + # No console output should occur on normal operations + self.assertNotRegex(stderr.decode(), r'.+') + self.assertNotRegex(stdout.decode(), r'.+') + + def test_silent_mode_does_not_avoid_error_messages(self): + self.authorize_with('active') + pipe = subprocess.Popen( + [sys.executable, arv_put.__file__] + ['--silent', + '/path/not/existant'], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, env=self.ENVIRON) + stdout, stderr = pipe.communicate() + # Error message should be displayed when errors happen + self.assertRegex(stderr.decode(), r'.*ERROR:.*') + self.assertNotRegex(stdout.decode(), r'.+') + if __name__ == '__main__': unittest.main()