-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
from __future__ import absolute_import
from __future__ import division
from future import standard_library
import threading
import hashlib
import random
+import uuid
import arvados
import arvados.commands.put as arv_put
with open(os.path.join(self.small_files_dir, str(i)), 'w') as f:
f.write(data + str(i))
self.arvfile_write = getattr(arvados.arvfile.ArvadosFileWriter, 'write')
+ # Temp dir to hold a symlink to other temp dir
+ self.tempdir_with_symlink = tempfile.mkdtemp()
+ os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir'))
+ os.symlink(os.path.join(self.tempdir, '1'),
+ os.path.join(self.tempdir_with_symlink, 'linkedfile'))
def tearDown(self):
super(ArvPutUploadJobTest, self).tearDown()
shutil.rmtree(self.tempdir)
os.unlink(self.large_file_name)
shutil.rmtree(self.small_files_dir)
+ shutil.rmtree(self.tempdir_with_symlink)
+
+ def test_symlinks_are_followed_by_default(self):
+ cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink])
+ cwriter.start(save_collection=False)
+ self.assertIn('linkeddir', cwriter.manifest_text())
+ self.assertIn('linkedfile', cwriter.manifest_text())
+ cwriter.destroy_cache()
+
+ def test_symlinks_are_not_followed_when_requested(self):
+ cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink],
+ follow_links=False)
+ cwriter.start(save_collection=False)
+ self.assertNotIn('linkeddir', cwriter.manifest_text())
+ self.assertNotIn('linkedfile', cwriter.manifest_text())
+ cwriter.destroy_cache()
+
+ def test_passing_nonexistant_path_raise_exception(self):
+ uuid_str = str(uuid.uuid4())
+ with self.assertRaises(arv_put.PathDoesNotExistError):
+ cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
def test_writer_works_without_cache(self):
cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False)
f.flush()
cwriter = arv_put.ArvPutUploadJob([f.name])
cwriter.start(save_collection=False)
- self.assertEqual(3, cwriter.bytes_written - cwriter.bytes_skipped)
+ self.assertEqual(0, cwriter.bytes_skipped)
+ self.assertEqual(3, cwriter.bytes_written)
# Don't destroy the cache, and start another upload
cwriter_new = arv_put.ArvPutUploadJob([f.name])
cwriter_new.start(save_collection=False)
cwriter_new.destroy_cache()
- self.assertEqual(0, cwriter_new.bytes_written - cwriter_new.bytes_skipped)
+ self.assertEqual(3, cwriter_new.bytes_skipped)
+ self.assertEqual(3, cwriter_new.bytes_written)
def make_progress_tester(self):
progression = []
for expect_count in (None, 8):
progression, reporter = self.make_progress_tester()
cwriter = arv_put.ArvPutUploadJob([f.name],
- reporter=reporter, bytes_expected=expect_count)
+ reporter=reporter)
+ cwriter.bytes_expected = expect_count
cwriter.start(save_collection=False)
cwriter.destroy_cache()
self.assertIn((3, expect_count), progression)
self.assertGreater(writer.bytes_written, 0)
self.assertLess(writer.bytes_written,
os.path.getsize(self.large_file_name))
- # Retry the upload using dry_run to check if there is a pending upload
- writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
- replication_desired=1,
- dry_run=True)
with self.assertRaises(arv_put.ArvPutUploadIsPending):
- writer2.start(save_collection=False)
+ # Retry the upload using dry_run to check if there is a pending upload
+ writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
+ replication_desired=1,
+ dry_run=True)
# Complete the pending upload
writer3 = arv_put.ArvPutUploadJob([self.large_file_name],
replication_desired=1)
writer3.start(save_collection=False)
- # Confirm there's no pending upload with dry_run=True
- writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
- replication_desired=1,
- dry_run=True)
with self.assertRaises(arv_put.ArvPutUploadNotPending):
- writer4.start(save_collection=False)
- writer4.destroy_cache()
+ # Confirm there's no pending upload with dry_run=True
+ writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
+ replication_desired=1,
+ dry_run=True)
# Test obvious cases
with self.assertRaises(arv_put.ArvPutUploadIsPending):
arv_put.ArvPutUploadJob([self.large_file_name],
TEST_SIZE = os.path.getsize(__file__)
def test_expected_bytes_for_file(self):
+ writer = arv_put.ArvPutUploadJob([__file__])
self.assertEqual(self.TEST_SIZE,
- arv_put.expected_bytes_for([__file__]))
+ writer.bytes_expected)
def test_expected_bytes_for_tree(self):
tree = self.make_tmpdir()
shutil.copyfile(__file__, os.path.join(tree, 'one'))
shutil.copyfile(__file__, os.path.join(tree, 'two'))
+
+ writer = arv_put.ArvPutUploadJob([tree])
self.assertEqual(self.TEST_SIZE * 2,
- arv_put.expected_bytes_for([tree]))
+ writer.bytes_expected)
+ writer = arv_put.ArvPutUploadJob([tree, __file__])
self.assertEqual(self.TEST_SIZE * 3,
- arv_put.expected_bytes_for([tree, __file__]))
+ writer.bytes_expected)
def test_expected_bytes_for_device(self):
- self.assertIsNone(arv_put.expected_bytes_for(['/dev/null']))
- self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null']))
+ writer = arv_put.ArvPutUploadJob(['/dev/null'])
+ self.assertIsNone(writer.bytes_expected)
+ writer = arv_put.ArvPutUploadJob([__file__, '/dev/null'])
+ self.assertIsNone(writer.bytes_expected)
class ArvadosPutReportTest(ArvadosBaseTestCase):
datadir = self.make_tmpdir()
with open(os.path.join(datadir, "foo"), "w") as f:
f.write("The quick brown fox jumped over the lazy dog")
- p = subprocess.Popen([sys.executable, arv_put.__file__, datadir],
- stdout=subprocess.PIPE, env=self.ENVIRON)
- (arvout, arverr) = p.communicate()
- self.assertEqual(arverr, None)
+ p = subprocess.Popen([sys.executable, arv_put.__file__,
+ os.path.join(datadir, 'foo')],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=self.ENVIRON)
+ (out, err) = p.communicate()
+ self.assertRegex(err.decode(), r'INFO: Collection saved as ')
self.assertEqual(p.returncode, 0)
# The manifest text stored in the API server under the same
self.assertEqual(col['uuid'], updated_col['uuid'])
# Get the manifest and check that the new file is being included
c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute()
- self.assertRegex(c['manifest_text'], r'^\. .*:44:file2\n')
+ self.assertRegex(c['manifest_text'], r'^\..* .*:44:file2\n')
+
+ def test_upload_directory_reference_without_trailing_slash(self):
+ tmpdir1 = self.make_tmpdir()
+ tmpdir2 = self.make_tmpdir()
+ with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+ f.write('This is foo')
+ with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+ f.write('This is not foo')
+ # Upload one directory and one file
+ col = self.run_and_find_collection("", ['--no-progress',
+ tmpdir1,
+ os.path.join(tmpdir2, 'bar')])
+ self.assertNotEqual(None, col['uuid'])
+ c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+ # Check that 'foo' was written inside a subcollection
+ # OTOH, 'bar' should have been directly uploaded on the root collection
+ self.assertRegex(c['manifest_text'], r'^\. .*:15:bar\n\./.+ .*:11:foo\n')
+
+ def test_upload_directory_reference_with_trailing_slash(self):
+ tmpdir1 = self.make_tmpdir()
+ tmpdir2 = self.make_tmpdir()
+ with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+ f.write('This is foo')
+ with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+ f.write('This is not foo')
+ # Upload one directory (with trailing slash) and one file
+ col = self.run_and_find_collection("", ['--no-progress',
+ tmpdir1 + os.sep,
+ os.path.join(tmpdir2, 'bar')])
+ self.assertNotEqual(None, col['uuid'])
+ c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+ # Check that 'foo' and 'bar' were written at the same level
+ self.assertRegex(c['manifest_text'], r'^\. .*:15:bar .*:11:foo\n')
def test_put_collection_with_high_redundancy(self):
# Write empty data: we're not testing CollectionWriter, just