Merge branch 'master' into 11060-all-visible-repos
[arvados.git] / sdk / python / tests / test_arv_put.py
index a7384030c9f02785e666811cc713396ec2f5a036..c2eaf12bbb048f24239839da2322fd0be69815c6 100644 (file)
@@ -1,6 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
 from __future__ import absolute_import
 from __future__ import division
 from future import standard_library
@@ -22,6 +19,7 @@ import yaml
 import threading
 import hashlib
 import random
+import uuid
 
 import arvados
 import arvados.commands.put as arv_put
@@ -271,12 +269,39 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
             with open(os.path.join(self.small_files_dir, str(i)), 'w') as f:
                 f.write(data + str(i))
         self.arvfile_write = getattr(arvados.arvfile.ArvadosFileWriter, 'write')
+        # Temp dir to hold a symlink to other temp dir
+        self.tempdir_with_symlink = tempfile.mkdtemp()
+        os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir'))
+        os.symlink(os.path.join(self.tempdir, '1'),
+                   os.path.join(self.tempdir_with_symlink, 'linkedfile'))
 
     def tearDown(self):
         super(ArvPutUploadJobTest, self).tearDown()
         shutil.rmtree(self.tempdir)
         os.unlink(self.large_file_name)
         shutil.rmtree(self.small_files_dir)
+        shutil.rmtree(self.tempdir_with_symlink)
+
+    def test_symlinks_are_followed_by_default(self):
+        cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink])
+        cwriter.start(save_collection=False)
+        self.assertIn('linkeddir', cwriter.manifest_text())
+        self.assertIn('linkedfile', cwriter.manifest_text())
+        cwriter.destroy_cache()
+
+    def test_symlinks_are_not_followed_when_requested(self):
+        cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink],
+                                          follow_links=False)
+        cwriter.start(save_collection=False)
+        self.assertNotIn('linkeddir', cwriter.manifest_text())
+        self.assertNotIn('linkedfile', cwriter.manifest_text())
+        cwriter.destroy_cache()
+
+    def test_passing_nonexistant_path_raise_exception(self):
+        uuid_str = str(uuid.uuid4())
+        cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
+        with self.assertRaises(arv_put.PathDoesNotExistError):
+            cwriter.start(save_collection=False)
 
     def test_writer_works_without_cache(self):
         cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False)
@@ -289,12 +314,14 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
             f.flush()
             cwriter = arv_put.ArvPutUploadJob([f.name])
             cwriter.start(save_collection=False)
-            self.assertEqual(3, cwriter.bytes_written - cwriter.bytes_skipped)
+            self.assertEqual(0, cwriter.bytes_skipped)
+            self.assertEqual(3, cwriter.bytes_written)
             # Don't destroy the cache, and start another upload
             cwriter_new = arv_put.ArvPutUploadJob([f.name])
             cwriter_new.start(save_collection=False)
             cwriter_new.destroy_cache()
-            self.assertEqual(0, cwriter_new.bytes_written - cwriter_new.bytes_skipped)
+            self.assertEqual(3, cwriter_new.bytes_skipped)
+            self.assertEqual(3, cwriter_new.bytes_written)
 
     def make_progress_tester(self):
         progression = []
@@ -764,16 +791,19 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
         datadir = self.make_tmpdir()
         with open(os.path.join(datadir, "foo"), "w") as f:
             f.write("The quick brown fox jumped over the lazy dog")
-        p = subprocess.Popen([sys.executable, arv_put.__file__, datadir],
-                             stdout=subprocess.PIPE, env=self.ENVIRON)
-        (arvout, arverr) = p.communicate()
-        self.assertEqual(arverr, None)
+        p = subprocess.Popen([sys.executable, arv_put.__file__,
+                              os.path.join(datadir, 'foo')],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE,
+                             env=self.ENVIRON)
+        (out, err) = p.communicate()
+        self.assertRegex(err.decode(), r'INFO: Collection saved as ')
         self.assertEqual(p.returncode, 0)
 
         # The manifest text stored in the API server under the same
         # manifest UUID must use signed locators.
         c = arv_put.api_client.collections().get(uuid=manifest_uuid).execute()
-        self.assertRegexpMatches(
+        self.assertRegex(
             c['manifest_text'],
             r'^\. 08a008a01d498c404b0c30852b39d3b8\+44\+A[0-9a-f]+@[0-9a-f]+ 0:44:foo\n')
 
@@ -787,6 +817,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
             stderr=subprocess.PIPE, env=self.ENVIRON)
         stdout, stderr = pipe.communicate(text.encode())
+        self.assertRegex(stderr.decode(), r'INFO: Collection (updated:|saved as)')
         search_key = ('portable_data_hash'
                       if '--portable-data-hash' in extra_args else 'uuid')
         collection_list = arvados.api('v1').collections().list(
@@ -808,7 +839,40 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
         self.assertEqual(col['uuid'], updated_col['uuid'])
         # Get the manifest and check that the new file is being included
         c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute()
-        self.assertRegexpMatches(c['manifest_text'], r'^\. .*:44:file2\n')
+        self.assertRegex(c['manifest_text'], r'^\..* .*:44:file2\n')
+
+    def test_upload_directory_reference_without_trailing_slash(self):
+        tmpdir1 = self.make_tmpdir()
+        tmpdir2 = self.make_tmpdir()
+        with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+            f.write('This is foo')
+        with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+            f.write('This is not foo')
+        # Upload one directory and one file
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                tmpdir1,
+                                                os.path.join(tmpdir2, 'bar')])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # Check that 'foo' was written inside a subcollection
+        # OTOH, 'bar' should have been directly uploaded on the root collection
+        self.assertRegex(c['manifest_text'], r'^\. .*:15:bar\n\./.+ .*:11:foo\n')
+
+    def test_upload_directory_reference_with_trailing_slash(self):
+        tmpdir1 = self.make_tmpdir()
+        tmpdir2 = self.make_tmpdir()
+        with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+            f.write('This is foo')
+        with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+            f.write('This is not foo')
+        # Upload one directory (with trailing slash) and one file
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                tmpdir1 + os.sep,
+                                                os.path.join(tmpdir2, 'bar')])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # Check that 'foo' and 'bar' were written at the same level
+        self.assertRegex(c['manifest_text'], r'^\. .*:15:bar .*:11:foo\n')
 
     def test_put_collection_with_high_redundancy(self):
         # Write empty data: we're not testing CollectionWriter, just
@@ -826,7 +890,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
             "Test unnamed collection",
             ['--portable-data-hash', '--project-uuid', self.PROJECT_UUID])
         username = pwd.getpwuid(os.getuid()).pw_name
-        self.assertRegexpMatches(
+        self.assertRegex(
             link['name'],
             r'^Saved at .* by {}@'.format(re.escape(username)))