-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
from __future__ import absolute_import
from __future__ import division
import threading
import hashlib
import random
+import uuid
import arvados
import arvados.commands.put as arv_put
with open(os.path.join(self.small_files_dir, str(i)), 'w') as f:
f.write(data + str(i))
self.arvfile_write = getattr(arvados.arvfile.ArvadosFileWriter, 'write')
+ # Temp dir to hold a symlink to other temp dir
+ self.tempdir_with_symlink = tempfile.mkdtemp()
+ os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir'))
+ os.symlink(os.path.join(self.tempdir, '1'),
+ os.path.join(self.tempdir_with_symlink, 'linkedfile'))
def tearDown(self):
super(ArvPutUploadJobTest, self).tearDown()
shutil.rmtree(self.tempdir)
os.unlink(self.large_file_name)
shutil.rmtree(self.small_files_dir)
+ shutil.rmtree(self.tempdir_with_symlink)
+
+ def test_symlinks_are_followed_by_default(self):
+ cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink])
+ cwriter.start(save_collection=False)
+ self.assertIn('linkeddir', cwriter.manifest_text())
+ self.assertIn('linkedfile', cwriter.manifest_text())
+ cwriter.destroy_cache()
+
+ def test_symlinks_are_not_followed_when_requested(self):
+ cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink],
+ follow_links=False)
+ cwriter.start(save_collection=False)
+ self.assertNotIn('linkeddir', cwriter.manifest_text())
+ self.assertNotIn('linkedfile', cwriter.manifest_text())
+ cwriter.destroy_cache()
+
+ def test_passing_nonexistant_path_raise_exception(self):
+ uuid_str = str(uuid.uuid4())
+ cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
+ with self.assertRaises(arv_put.PathDoesNotExistError):
+ cwriter.start(save_collection=False)
def test_writer_works_without_cache(self):
cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False)
f.flush()
cwriter = arv_put.ArvPutUploadJob([f.name])
cwriter.start(save_collection=False)
- self.assertEqual(3, cwriter.bytes_written - cwriter.bytes_skipped)
+ self.assertEqual(0, cwriter.bytes_skipped)
+ self.assertEqual(3, cwriter.bytes_written)
# Don't destroy the cache, and start another upload
cwriter_new = arv_put.ArvPutUploadJob([f.name])
cwriter_new.start(save_collection=False)
cwriter_new.destroy_cache()
- self.assertEqual(0, cwriter_new.bytes_written - cwriter_new.bytes_skipped)
+ self.assertEqual(3, cwriter_new.bytes_skipped)
+ self.assertEqual(3, cwriter_new.bytes_written)
def make_progress_tester(self):
progression = []
datadir = self.make_tmpdir()
with open(os.path.join(datadir, "foo"), "w") as f:
f.write("The quick brown fox jumped over the lazy dog")
- p = subprocess.Popen([sys.executable, arv_put.__file__, datadir],
- stdout=subprocess.PIPE, env=self.ENVIRON)
- (arvout, arverr) = p.communicate()
- self.assertEqual(arverr, None)
+ p = subprocess.Popen([sys.executable, arv_put.__file__,
+ os.path.join(datadir, 'foo')],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=self.ENVIRON)
+ (out, err) = p.communicate()
+ self.assertRegex(err.decode(), r'INFO: Collection saved as ')
self.assertEqual(p.returncode, 0)
# The manifest text stored in the API server under the same
self.assertEqual(col['uuid'], updated_col['uuid'])
# Get the manifest and check that the new file is being included
c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute()
- self.assertRegex(c['manifest_text'], r'^\. .*:44:file2\n')
+ self.assertRegex(c['manifest_text'], r'^\..* .*:44:file2\n')
+
+ def test_upload_directory_reference_without_trailing_slash(self):
+ tmpdir1 = self.make_tmpdir()
+ tmpdir2 = self.make_tmpdir()
+ with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+ f.write('This is foo')
+ with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+ f.write('This is not foo')
+ # Upload one directory and one file
+ col = self.run_and_find_collection("", ['--no-progress',
+ tmpdir1,
+ os.path.join(tmpdir2, 'bar')])
+ self.assertNotEqual(None, col['uuid'])
+ c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+ # Check that 'foo' was written inside a subcollection
+ # OTOH, 'bar' should have been directly uploaded on the root collection
+ self.assertRegex(c['manifest_text'], r'^\. .*:15:bar\n\./.+ .*:11:foo\n')
+
+ def test_upload_directory_reference_with_trailing_slash(self):
+ tmpdir1 = self.make_tmpdir()
+ tmpdir2 = self.make_tmpdir()
+ with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+ f.write('This is foo')
+ with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+ f.write('This is not foo')
+ # Upload one directory (with trailing slash) and one file
+ col = self.run_and_find_collection("", ['--no-progress',
+ tmpdir1 + os.sep,
+ os.path.join(tmpdir2, 'bar')])
+ self.assertNotEqual(None, col['uuid'])
+ c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+ # Check that 'foo' and 'bar' were written at the same level
+ self.assertRegex(c['manifest_text'], r'^\. .*:15:bar .*:11:foo\n')
def test_put_collection_with_high_redundancy(self):
# Write empty data: we're not testing CollectionWriter, just