+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
from __future__ import absolute_import
from __future__ import division
from future import standard_library
from builtins import str
from builtins import range
import apiclient
+import datetime
+import hashlib
+import json
import mock
import os
import pwd
+import random
import re
import shutil
import subprocess
import sys
import tempfile
+import threading
import time
import unittest
-import yaml
-import threading
-import hashlib
-import random
import uuid
+import yaml
import arvados
import arvados.commands.put as arv_put
self.last_cache.close()
resume_cache = arv_put.ResumeCache(self.last_cache.filename)
self.assertNotEqual(None, resume_cache)
- self.assertRaises(None, resume_cache.check_cache())
+ resume_cache.check_cache()
def test_basic_cache_storage(self):
thing = ['test', 'list']
def test_passing_nonexistant_path_raise_exception(self):
uuid_str = str(uuid.uuid4())
- cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
with self.assertRaises(arv_put.PathDoesNotExistError):
- cwriter.start(save_collection=False)
+ cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
def test_writer_works_without_cache(self):
cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False)
for expect_count in (None, 8):
progression, reporter = self.make_progress_tester()
cwriter = arv_put.ArvPutUploadJob([f.name],
- reporter=reporter, bytes_expected=expect_count)
+ reporter=reporter)
+ cwriter.bytes_expected = expect_count
cwriter.start(save_collection=False)
cwriter.destroy_cache()
self.assertIn((3, expect_count), progression)
self.assertGreater(writer.bytes_written, 0)
self.assertLess(writer.bytes_written,
os.path.getsize(self.large_file_name))
- # Retry the upload using dry_run to check if there is a pending upload
- writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
- replication_desired=1,
- dry_run=True)
with self.assertRaises(arv_put.ArvPutUploadIsPending):
- writer2.start(save_collection=False)
+ # Retry the upload using dry_run to check if there is a pending upload
+ writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
+ replication_desired=1,
+ dry_run=True)
# Complete the pending upload
writer3 = arv_put.ArvPutUploadJob([self.large_file_name],
replication_desired=1)
writer3.start(save_collection=False)
- # Confirm there's no pending upload with dry_run=True
- writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
- replication_desired=1,
- dry_run=True)
with self.assertRaises(arv_put.ArvPutUploadNotPending):
- writer4.start(save_collection=False)
- writer4.destroy_cache()
+ # Confirm there's no pending upload with dry_run=True
+ writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
+ replication_desired=1,
+ dry_run=True)
# Test obvious cases
with self.assertRaises(arv_put.ArvPutUploadIsPending):
arv_put.ArvPutUploadJob([self.large_file_name],
TEST_SIZE = os.path.getsize(__file__)
def test_expected_bytes_for_file(self):
+ writer = arv_put.ArvPutUploadJob([__file__])
self.assertEqual(self.TEST_SIZE,
- arv_put.expected_bytes_for([__file__]))
+ writer.bytes_expected)
def test_expected_bytes_for_tree(self):
tree = self.make_tmpdir()
shutil.copyfile(__file__, os.path.join(tree, 'one'))
shutil.copyfile(__file__, os.path.join(tree, 'two'))
+
+ writer = arv_put.ArvPutUploadJob([tree])
self.assertEqual(self.TEST_SIZE * 2,
- arv_put.expected_bytes_for([tree]))
+ writer.bytes_expected)
+ writer = arv_put.ArvPutUploadJob([tree, __file__])
self.assertEqual(self.TEST_SIZE * 3,
- arv_put.expected_bytes_for([tree, __file__]))
+ writer.bytes_expected)
def test_expected_bytes_for_device(self):
- self.assertIsNone(arv_put.expected_bytes_for(['/dev/null']))
- self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null']))
+ writer = arv_put.ArvPutUploadJob(['/dev/null'])
+ self.assertIsNone(writer.bytes_expected)
+ writer = arv_put.ArvPutUploadJob([__file__, '/dev/null'])
+ self.assertIsNone(writer.bytes_expected)
class ArvadosPutReportTest(ArvadosBaseTestCase):
self.call_main_with_args,
['--project-uuid', self.Z_UUID, '--stream'])
+ def test_error_when_excluding_absolute_path(self):
+ tmpdir = self.make_tmpdir()
+ self.assertRaises(SystemExit,
+ self.call_main_with_args,
+ ['--exclude', '/some/absolute/path/*',
+ tmpdir])
+
def test_api_error_handling(self):
coll_save_mock = mock.Mock(name='arv.collection.Collection().save_new()')
coll_save_mock.side_effect = arvados.errors.ApiError(
cls.ENVIRON = os.environ.copy()
cls.ENVIRON['PYTHONPATH'] = ':'.join(sys.path)
+ def datetime_to_hex(self, dt):
+ return hex(int(time.mktime(dt.timetuple())))[2:]
+
def setUp(self):
super(ArvPutIntegrationTest, self).setUp()
arv_put.api_client = None
self.assertEqual(1, len(collection_list))
return collection_list[0]
+ def test_expired_token_invalidates_cache(self):
+ self.authorize_with('active')
+ tmpdir = self.make_tmpdir()
+ with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f:
+ f.write('foo')
+ # Upload a directory and get the cache file name
+ p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=self.ENVIRON)
+ (out, err) = p.communicate()
+ self.assertRegex(err.decode(), r'INFO: Creating new cache file at ')
+ self.assertEqual(p.returncode, 0)
+ cache_filepath = re.search(r'INFO: Creating new cache file at (.*)',
+ err.decode()).groups()[0]
+ self.assertTrue(os.path.isfile(cache_filepath))
+ # Load the cache file contents and modify the manifest to simulate
+ # an expired access token
+ with open(cache_filepath, 'r') as c:
+ cache = json.load(c)
+ self.assertRegex(cache['manifest'], r'\+A\S+\@')
+ a_month_ago = datetime.datetime.now() - datetime.timedelta(days=30)
+ cache['manifest'] = re.sub(
+ r'\@.*? ',
+ "@{} ".format(self.datetime_to_hex(a_month_ago)),
+ cache['manifest'])
+ with open(cache_filepath, 'w') as c:
+ c.write(json.dumps(cache))
+ # Re-run the upload and expect to get an invalid cache message
+ p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=self.ENVIRON)
+ (out, err) = p.communicate()
+ self.assertRegex(
+ err.decode(),
+ r'WARNING: Uploaded file .* access token expired, will re-upload it from scratch')
+ self.assertEqual(p.returncode, 0)
+ # Confirm that the resulting cache is different from the last run.
+ with open(cache_filepath, 'r') as c2:
+ new_cache = json.load(c2)
+ self.assertNotEqual(cache['manifest'], new_cache['manifest'])
+
def test_put_collection_with_later_update(self):
tmpdir = self.make_tmpdir()
with open(os.path.join(tmpdir, 'file1'), 'w') as f:
'--project-uuid', self.PROJECT_UUID])
self.assertEqual(link_name, collection['name'])
+ def test_exclude_filename_pattern(self):
+ tmpdir = self.make_tmpdir()
+ tmpsubdir = os.path.join(tmpdir, 'subdir')
+ os.mkdir(tmpsubdir)
+ for fname in ['file1', 'file2', 'file3']:
+ with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f:
+ f.write("This is %s" % fname)
+ with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f:
+ f.write("This is %s" % fname)
+ col = self.run_and_find_collection("", ['--no-progress',
+ '--exclude', '*2.txt',
+ '--exclude', 'file3.*',
+ tmpdir])
+ self.assertNotEqual(None, col['uuid'])
+ c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+ # None of the file2.txt & file3.txt should have been uploaded
+ self.assertRegex(c['manifest_text'], r'^.*:file1.txt')
+ self.assertNotRegex(c['manifest_text'], r'^.*:file2.txt')
+ self.assertNotRegex(c['manifest_text'], r'^.*:file3.txt')
+
+ def test_exclude_filepath_pattern(self):
+ tmpdir = self.make_tmpdir()
+ tmpsubdir = os.path.join(tmpdir, 'subdir')
+ os.mkdir(tmpsubdir)
+ for fname in ['file1', 'file2', 'file3']:
+ with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f:
+ f.write("This is %s" % fname)
+ with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f:
+ f.write("This is %s" % fname)
+ col = self.run_and_find_collection("", ['--no-progress',
+ '--exclude', 'subdir/*2.txt',
+ '--exclude', './file1.*',
+ tmpdir])
+ self.assertNotEqual(None, col['uuid'])
+ c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+ # Only tmpdir/file1.txt & tmpdir/subdir/file2.txt should have been excluded
+ self.assertNotRegex(c['manifest_text'],
+ r'^\./%s.*:file1.txt' % os.path.basename(tmpdir))
+ self.assertNotRegex(c['manifest_text'],
+ r'^\./%s/subdir.*:file2.txt' % os.path.basename(tmpdir))
+ self.assertRegex(c['manifest_text'],
+ r'^\./%s.*:file2.txt' % os.path.basename(tmpdir))
+ self.assertRegex(c['manifest_text'], r'^.*:file3.txt')
+
+ def test_silent_mode_no_errors(self):
+ self.authorize_with('active')
+ tmpdir = self.make_tmpdir()
+ with open(os.path.join(tmpdir, 'test.txt'), 'w') as f:
+ f.write('hello world')
+ pipe = subprocess.Popen(
+ [sys.executable, arv_put.__file__] + ['--silent', tmpdir],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, env=self.ENVIRON)
+ stdout, stderr = pipe.communicate()
+ # No console output should occur on normal operations
+ self.assertNotRegex(stderr.decode(), r'.+')
+ self.assertNotRegex(stdout.decode(), r'.+')
+
+ def test_silent_mode_does_not_avoid_error_messages(self):
+ self.authorize_with('active')
+ pipe = subprocess.Popen(
+ [sys.executable, arv_put.__file__] + ['--silent',
+ '/path/not/existant'],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, env=self.ENVIRON)
+ stdout, stderr = pipe.communicate()
+ # Error message should be displayed when errors happen
+ self.assertRegex(stderr.decode(), r'.*ERROR:.*')
+ self.assertNotRegex(stdout.decode(), r'.+')
+
if __name__ == '__main__':
unittest.main()