11789: Merge branch 'master' into 11789-arvput-exclude-flag
[arvados.git] / sdk / python / tests / test_arv_put.py
index 6d103526f40ebc1b8de64cdf2956f1867cdcf538..802dbdb22ba6e554f31ea1a28168ec339763454c 100644 (file)
@@ -1,3 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from __future__ import absolute_import
 from __future__ import division
 from future import standard_library
@@ -299,9 +303,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
 
     def test_passing_nonexistant_path_raise_exception(self):
         uuid_str = str(uuid.uuid4())
-        cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
         with self.assertRaises(arv_put.PathDoesNotExistError):
-            cwriter.start(save_collection=False)
+            cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
 
     def test_writer_works_without_cache(self):
         cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False)
@@ -336,7 +339,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
             for expect_count in (None, 8):
                 progression, reporter = self.make_progress_tester()
                 cwriter = arv_put.ArvPutUploadJob([f.name],
-                    reporter=reporter, bytes_expected=expect_count)
+                                                  reporter=reporter)
+                cwriter.bytes_expected = expect_count
                 cwriter.start(save_collection=False)
                 cwriter.destroy_cache()
                 self.assertIn((3, expect_count), progression)
@@ -492,23 +496,20 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
             self.assertGreater(writer.bytes_written, 0)
             self.assertLess(writer.bytes_written,
                             os.path.getsize(self.large_file_name))
-        # Retry the upload using dry_run to check if there is a pending upload
-        writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
-                                          replication_desired=1,
-                                          dry_run=True)
         with self.assertRaises(arv_put.ArvPutUploadIsPending):
-            writer2.start(save_collection=False)
+            # Retry the upload using dry_run to check if there is a pending upload
+            writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                              replication_desired=1,
+                                              dry_run=True)
         # Complete the pending upload
         writer3 = arv_put.ArvPutUploadJob([self.large_file_name],
                                           replication_desired=1)
         writer3.start(save_collection=False)
-        # Confirm there's no pending upload with dry_run=True
-        writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
-                                          replication_desired=1,
-                                          dry_run=True)
         with self.assertRaises(arv_put.ArvPutUploadNotPending):
-            writer4.start(save_collection=False)
-        writer4.destroy_cache()
+            # Confirm there's no pending upload with dry_run=True
+            writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                              replication_desired=1,
+                                              dry_run=True)
         # Test obvious cases
         with self.assertRaises(arv_put.ArvPutUploadIsPending):
             arv_put.ArvPutUploadJob([self.large_file_name],
@@ -527,21 +528,27 @@ class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
     TEST_SIZE = os.path.getsize(__file__)
 
     def test_expected_bytes_for_file(self):
+        writer = arv_put.ArvPutUploadJob([__file__])
         self.assertEqual(self.TEST_SIZE,
-                          arv_put.expected_bytes_for([__file__]))
+                         writer.bytes_expected)
 
     def test_expected_bytes_for_tree(self):
         tree = self.make_tmpdir()
         shutil.copyfile(__file__, os.path.join(tree, 'one'))
         shutil.copyfile(__file__, os.path.join(tree, 'two'))
+
+        writer = arv_put.ArvPutUploadJob([tree])
         self.assertEqual(self.TEST_SIZE * 2,
-                          arv_put.expected_bytes_for([tree]))
+                         writer.bytes_expected)
+        writer = arv_put.ArvPutUploadJob([tree, __file__])
         self.assertEqual(self.TEST_SIZE * 3,
-                          arv_put.expected_bytes_for([tree, __file__]))
+                         writer.bytes_expected)
 
     def test_expected_bytes_for_device(self):
-        self.assertIsNone(arv_put.expected_bytes_for(['/dev/null']))
-        self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null']))
+        writer = arv_put.ArvPutUploadJob(['/dev/null'])
+        self.assertIsNone(writer.bytes_expected)
+        writer = arv_put.ArvPutUploadJob([__file__, '/dev/null'])
+        self.assertIsNone(writer.bytes_expected)
 
 
 class ArvadosPutReportTest(ArvadosBaseTestCase):
@@ -669,6 +676,13 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers,
                           self.call_main_with_args,
                           ['--project-uuid', self.Z_UUID, '--stream'])
 
+    def test_error_when_excluding_absolute_path(self):
+        tmpdir = self.make_tmpdir()
+        self.assertRaises(SystemExit,
+                          self.call_main_with_args,
+                          ['--exclude', '/some/absolute/path/*',
+                           tmpdir])
+
     def test_api_error_handling(self):
         coll_save_mock = mock.Mock(name='arv.collection.Collection().save_new()')
         coll_save_mock.side_effect = arvados.errors.ApiError(
@@ -791,7 +805,8 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
         datadir = self.make_tmpdir()
         with open(os.path.join(datadir, "foo"), "w") as f:
             f.write("The quick brown fox jumped over the lazy dog")
-        p = subprocess.Popen([sys.executable, arv_put.__file__, datadir],
+        p = subprocess.Popen([sys.executable, arv_put.__file__,
+                              os.path.join(datadir, 'foo')],
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              env=self.ENVIRON)
@@ -838,7 +853,40 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
         self.assertEqual(col['uuid'], updated_col['uuid'])
         # Get the manifest and check that the new file is being included
         c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute()
-        self.assertRegex(c['manifest_text'], r'^\. .*:44:file2\n')
+        self.assertRegex(c['manifest_text'], r'^\..* .*:44:file2\n')
+
+    def test_upload_directory_reference_without_trailing_slash(self):
+        tmpdir1 = self.make_tmpdir()
+        tmpdir2 = self.make_tmpdir()
+        with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+            f.write('This is foo')
+        with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+            f.write('This is not foo')
+        # Upload one directory and one file
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                tmpdir1,
+                                                os.path.join(tmpdir2, 'bar')])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # Check that 'foo' was written inside a subcollection
+        # OTOH, 'bar' should have been directly uploaded on the root collection
+        self.assertRegex(c['manifest_text'], r'^\. .*:15:bar\n\./.+ .*:11:foo\n')
+
+    def test_upload_directory_reference_with_trailing_slash(self):
+        tmpdir1 = self.make_tmpdir()
+        tmpdir2 = self.make_tmpdir()
+        with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+            f.write('This is foo')
+        with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+            f.write('This is not foo')
+        # Upload one directory (with trailing slash) and one file
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                tmpdir1 + os.sep,
+                                                os.path.join(tmpdir2, 'bar')])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # Check that 'foo' and 'bar' were written at the same level
+        self.assertRegex(c['manifest_text'], r'^\. .*:15:bar .*:11:foo\n')
 
     def test_put_collection_with_high_redundancy(self):
         # Write empty data: we're not testing CollectionWriter, just
@@ -877,6 +925,48 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
                                        '--project-uuid', self.PROJECT_UUID])
         self.assertEqual(link_name, collection['name'])
 
+    def test_exclude_filename_pattern(self):
+        tmpdir = self.make_tmpdir()
+        tmpsubdir = os.path.join(tmpdir, 'subdir')
+        os.mkdir(tmpsubdir)
+        for fname in ['file1', 'file2', 'file3']:
+            with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f:
+                f.write("This is %s" % fname)
+            with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f:
+                f.write("This is %s" % fname)
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                '--exclude', '*2.txt',
+                                                '--exclude', 'file3.*',
+                                                 tmpdir])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # None of the file2.txt & file3.txt should have been uploaded
+        self.assertRegex(c['manifest_text'], r'^.*:file1.txt')
+        self.assertNotRegex(c['manifest_text'], r'^.*:file2.txt')
+        self.assertNotRegex(c['manifest_text'], r'^.*:file3.txt')
+
+    def test_exclude_filepath_pattern(self):
+        tmpdir = self.make_tmpdir()
+        tmpsubdir = os.path.join(tmpdir, 'subdir')
+        os.mkdir(tmpsubdir)
+        for fname in ['file1', 'file2', 'file3']:
+            with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f:
+                f.write("This is %s" % fname)
+            with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f:
+                f.write("This is %s" % fname)
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                '--exclude', 'subdir/*2.txt',
+                                                 tmpdir])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # Only tmpdir/file2.txt should have been uploaded
+        self.assertRegex(c['manifest_text'], r'^.*:file1.txt')
+        self.assertRegex(c['manifest_text'],
+                         r'^\./%s.*:file2.txt' % os.path.basename(tmpdir))
+        self.assertNotRegex(c['manifest_text'],
+                            r'^\./%s/subdir.*:file2.txt' % os.path.basename(tmpdir))
+        self.assertRegex(c['manifest_text'], r'^.*:file3.txt')
+
 
 if __name__ == '__main__':
     unittest.main()