11789: Unified the exclude logic by removing expected_bytes_for() and moving

[arvados.git] / sdk / python / tests / test_arv_put.py
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py

index e0ff7a772d10b44834d64ebf534b0f95310e2883..bfa39b861fdbf83162a86366840b5000e3f2e6ec 100644 (file)
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
  from __future__ import absolute_import
  from __future__ import division
  from future import standard_library
@@ -22,6 +19,7 @@ import yaml
  import threading
  import hashlib
  import random
+import uuid
  
  import arvados
  import arvados.commands.put as arv_put
@@ -271,12 +269,38 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
              with open(os.path.join(self.small_files_dir, str(i)), 'w') as f:
                  f.write(data + str(i))
          self.arvfile_write = getattr(arvados.arvfile.ArvadosFileWriter, 'write')
+        # Temp dir to hold a symlink to other temp dir
+        self.tempdir_with_symlink = tempfile.mkdtemp()
+        os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir'))
+        os.symlink(os.path.join(self.tempdir, '1'),
+                   os.path.join(self.tempdir_with_symlink, 'linkedfile'))
  
      def tearDown(self):
          super(ArvPutUploadJobTest, self).tearDown()
          shutil.rmtree(self.tempdir)
          os.unlink(self.large_file_name)
          shutil.rmtree(self.small_files_dir)
+        shutil.rmtree(self.tempdir_with_symlink)
+
+    def test_symlinks_are_followed_by_default(self):
+        cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink])
+        cwriter.start(save_collection=False)
+        self.assertIn('linkeddir', cwriter.manifest_text())
+        self.assertIn('linkedfile', cwriter.manifest_text())
+        cwriter.destroy_cache()
+
+    def test_symlinks_are_not_followed_when_requested(self):
+        cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink],
+                                          follow_links=False)
+        cwriter.start(save_collection=False)
+        self.assertNotIn('linkeddir', cwriter.manifest_text())
+        self.assertNotIn('linkedfile', cwriter.manifest_text())
+        cwriter.destroy_cache()
+
+    def test_passing_nonexistant_path_raise_exception(self):
+        uuid_str = str(uuid.uuid4())
+        with self.assertRaises(arv_put.PathDoesNotExistError):
+            cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
  
      def test_writer_works_without_cache(self):
          cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False)
@@ -289,12 +313,14 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
              f.flush()
              cwriter = arv_put.ArvPutUploadJob([f.name])
              cwriter.start(save_collection=False)
-            self.assertEqual(3, cwriter.bytes_written - cwriter.bytes_skipped)
+            self.assertEqual(0, cwriter.bytes_skipped)
+            self.assertEqual(3, cwriter.bytes_written)
              # Don't destroy the cache, and start another upload
              cwriter_new = arv_put.ArvPutUploadJob([f.name])
              cwriter_new.start(save_collection=False)
              cwriter_new.destroy_cache()
-            self.assertEqual(0, cwriter_new.bytes_written - cwriter_new.bytes_skipped)
+            self.assertEqual(3, cwriter_new.bytes_skipped)
+            self.assertEqual(3, cwriter_new.bytes_written)
  
      def make_progress_tester(self):
          progression = []
@@ -309,7 +335,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
              for expect_count in (None, 8):
                  progression, reporter = self.make_progress_tester()
                  cwriter = arv_put.ArvPutUploadJob([f.name],
-                    reporter=reporter, bytes_expected=expect_count)
+                                                  reporter=reporter)
+                cwriter.bytes_expected = expect_count
                  cwriter.start(save_collection=False)
                  cwriter.destroy_cache()
                  self.assertIn((3, expect_count), progression)
@@ -465,23 +492,20 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
              self.assertGreater(writer.bytes_written, 0)
              self.assertLess(writer.bytes_written,
                              os.path.getsize(self.large_file_name))
-        # Retry the upload using dry_run to check if there is a pending upload
-        writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
-                                          replication_desired=1,
-                                          dry_run=True)
          with self.assertRaises(arv_put.ArvPutUploadIsPending):
-            writer2.start(save_collection=False)
+            # Retry the upload using dry_run to check if there is a pending upload
+            writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                              replication_desired=1,
+                                              dry_run=True)
          # Complete the pending upload
          writer3 = arv_put.ArvPutUploadJob([self.large_file_name],
                                            replication_desired=1)
          writer3.start(save_collection=False)
-        # Confirm there's no pending upload with dry_run=True
-        writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
-                                          replication_desired=1,
-                                          dry_run=True)
          with self.assertRaises(arv_put.ArvPutUploadNotPending):
-            writer4.start(save_collection=False)
-        writer4.destroy_cache()
+            # Confirm there's no pending upload with dry_run=True
+            writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                              replication_desired=1,
+                                              dry_run=True)
          # Test obvious cases
          with self.assertRaises(arv_put.ArvPutUploadIsPending):
              arv_put.ArvPutUploadJob([self.large_file_name],
@@ -500,21 +524,27 @@ class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
      TEST_SIZE = os.path.getsize(__file__)
  
      def test_expected_bytes_for_file(self):
+        writer = arv_put.ArvPutUploadJob([__file__])
          self.assertEqual(self.TEST_SIZE,
-                          arv_put.expected_bytes_for([__file__]))
+                         writer.bytes_expected)
  
      def test_expected_bytes_for_tree(self):
          tree = self.make_tmpdir()
          shutil.copyfile(__file__, os.path.join(tree, 'one'))
          shutil.copyfile(__file__, os.path.join(tree, 'two'))
+
+        writer = arv_put.ArvPutUploadJob([tree])
          self.assertEqual(self.TEST_SIZE * 2,
-                          arv_put.expected_bytes_for([tree]))
+                         writer.bytes_expected)
+        writer = arv_put.ArvPutUploadJob([tree, __file__])
          self.assertEqual(self.TEST_SIZE * 3,
-                          arv_put.expected_bytes_for([tree, __file__]))
+                         writer.bytes_expected)
  
      def test_expected_bytes_for_device(self):
-        self.assertIsNone(arv_put.expected_bytes_for(['/dev/null']))
-        self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null']))
+        writer = arv_put.ArvPutUploadJob(['/dev/null'])
+        self.assertIsNone(writer.bytes_expected)
+        writer = arv_put.ArvPutUploadJob([__file__, '/dev/null'])
+        self.assertIsNone(writer.bytes_expected)
  
  
  class ArvadosPutReportTest(ArvadosBaseTestCase):
@@ -764,10 +794,13 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
          datadir = self.make_tmpdir()
          with open(os.path.join(datadir, "foo"), "w") as f:
              f.write("The quick brown fox jumped over the lazy dog")
-        p = subprocess.Popen([sys.executable, arv_put.__file__, datadir],
-                             stdout=subprocess.PIPE, env=self.ENVIRON)
-        (arvout, arverr) = p.communicate()
-        self.assertEqual(arverr, None)
+        p = subprocess.Popen([sys.executable, arv_put.__file__,
+                              os.path.join(datadir, 'foo')],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE,
+                             env=self.ENVIRON)
+        (out, err) = p.communicate()
+        self.assertRegex(err.decode(), r'INFO: Collection saved as ')
          self.assertEqual(p.returncode, 0)
  
          # The manifest text stored in the API server under the same
@@ -809,7 +842,40 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
          self.assertEqual(col['uuid'], updated_col['uuid'])
          # Get the manifest and check that the new file is being included
          c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute()
-        self.assertRegex(c['manifest_text'], r'^\. .*:44:file2\n')
+        self.assertRegex(c['manifest_text'], r'^\..* .*:44:file2\n')
+
+    def test_upload_directory_reference_without_trailing_slash(self):
+        tmpdir1 = self.make_tmpdir()
+        tmpdir2 = self.make_tmpdir()
+        with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+            f.write('This is foo')
+        with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+            f.write('This is not foo')
+        # Upload one directory and one file
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                tmpdir1,
+                                                os.path.join(tmpdir2, 'bar')])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # Check that 'foo' was written inside a subcollection
+        # OTOH, 'bar' should have been directly uploaded on the root collection
+        self.assertRegex(c['manifest_text'], r'^\. .*:15:bar\n\./.+ .*:11:foo\n')
+
+    def test_upload_directory_reference_with_trailing_slash(self):
+        tmpdir1 = self.make_tmpdir()
+        tmpdir2 = self.make_tmpdir()
+        with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+            f.write('This is foo')
+        with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+            f.write('This is not foo')
+        # Upload one directory (with trailing slash) and one file
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                tmpdir1 + os.sep,
+                                                os.path.join(tmpdir2, 'bar')])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # Check that 'foo' and 'bar' were written at the same level
+        self.assertRegex(c['manifest_text'], r'^\. .*:15:bar .*:11:foo\n')
  
      def test_put_collection_with_high_redundancy(self):
          # Write empty data: we're not testing CollectionWriter, just