Merge branch '18947-githttpd'
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
index c1e24f0e3bafd008334421f7fb9af5c7e326d4e6..98bc98abd4cb2a9686f0e21d0bf514683bcde74a 100644 (file)
@@ -1,3 +1,11 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+from __future__ import absolute_import
+from future.utils import viewitems
+from builtins import str
+from builtins import range
 import arvados
 import arvados_fuse as fuse
 import llfuse
@@ -7,14 +15,19 @@ import sys
 import unittest
 from .. import run_test_server
 from ..mount_test_base import MountTestBase
+from ..slow_test import slow_test
 
 logger = logging.getLogger('arvados.arv-mount')
 
-from performance_profiler import profiled
+from .performance_profiler import profiled
 
-def fuse_CreateCollection(mounttmp, streams=1, files_per_stream=1, data='x'):
+def fuse_createCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
     class Test(unittest.TestCase):
         def runTest(self):
+            self.createCollectionWithMultipleBlocks()
+
+        @profiled
+        def createCollectionWithMultipleBlocks(self):
             for i in range(0, streams):
                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
 
@@ -25,9 +38,13 @@ def fuse_CreateCollection(mounttmp, streams=1, files_per_stream=1, data='x'):
 
     Test().runTest()
 
-def fuse_ReadContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
+def fuse_readContentsFromCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
     class Test(unittest.TestCase):
         def runTest(self):
+            self.readContentsFromCollectionWithMultipleBlocks()
+
+        @profiled
+        def readContentsFromCollectionWithMultipleBlocks(self):
             for i in range(0, streams):
                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
                 for j in range(0, files_per_stream):
@@ -36,9 +53,13 @@ def fuse_ReadContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_
 
     Test().runTest()
 
-def fuse_MoveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
+def fuse_moveFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
     class Test(unittest.TestCase):
         def runTest(self):
+            self.moveFileFromCollectionWithMultipleBlocks()
+
+        @profiled
+        def moveFileFromCollectionWithMultipleBlocks(self):
             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
             self.assertIn(filename, d1)
 
@@ -52,9 +73,13 @@ def fuse_MoveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
 
     Test().runTest()
 
-def fuse_DeleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
+def fuse_deleteFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
     class Test(unittest.TestCase):
         def runTest(self):
+            self.deleteFileFromCollectionWithMultipleBlocks()
+
+        @profiled
+        def deleteFileFromCollectionWithMultipleBlocks(self):
             os.remove(os.path.join(mounttmp, stream, filename))
 
     Test().runTest()
@@ -64,24 +89,7 @@ class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
     def setUp(self):
         super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
 
-    @profiled
-    def createCollectionWithMultipleBlocks(self, streams, files_per_stream, data):
-        self.pool.apply(fuse_CreateCollection, (self.mounttmp, streams, files_per_stream, data,))
-
-    @profiled
-    def readContentsOfCollectionWithMultipleBlocks(self, streams, files_per_stream, data):
-        self.pool.apply(fuse_ReadContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
-
-    @profiled
-    def moveFileFromCollectionWithMultipleBlocks(self, streams):
-        for i in range(0, streams):
-            self.pool.apply(fuse_MoveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
-
-    @profiled
-    def removeFileFromCollectionWithMultipleBlocks(self, streams):
-        for i in range(0, streams):
-            self.pool.apply(fuse_DeleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
-
+    @slow_test
     def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
         collection = arvados.collection.Collection(api_client=self.api)
         collection.save_new()
@@ -98,7 +106,7 @@ class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
 
         data = 'x' * blocks_per_file * bytes_per_block
 
-        self.createCollectionWithMultipleBlocks(streams, files_per_stream, data)
+        self.pool.apply(fuse_createCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
 
         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
 
@@ -109,10 +117,11 @@ class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
 
         # Read file contents
-        self.readContentsOfCollectionWithMultipleBlocks(streams, files_per_stream, data)
+        self.pool.apply(fuse_readContentsFromCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
 
         # Move file0.txt out of the streams into .
-        self.moveFileFromCollectionWithMultipleBlocks(streams)
+        for i in range(0, streams):
+            self.pool.apply(fuse_moveFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
 
         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
 
@@ -130,7 +139,8 @@ class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
 
         # Delete 'file1.txt' from all the streams
-        self.removeFileFromCollectionWithMultipleBlocks(streams)
+        for i in range(0, streams):
+            self.pool.apply(fuse_deleteFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
 
         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
 
@@ -146,29 +156,76 @@ class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
             for j in range(2, files_per_stream):
                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
 
-# Create a collection with two streams, each with 200 files
-class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
-    def setUp(self):
-        super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
 
-    @profiled
-    def createCollectionWithManyFiles(self, streams, files_per_stream, data):
-        self.pool.apply(fuse_CreateCollection, (self.mounttmp, streams, files_per_stream, data,))
+def fuse_createCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.createCollectionWithManyFiles()
 
-    @profiled
-    def readContentsOfCollectionWithManyFiles(self, streams, files_per_stream, data):
-        self.pool.apply(fuse_ReadContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
+        @profiled
+        def createCollectionWithManyFiles(self):
+            for i in range(0, streams):
+                os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
 
-    @profiled
-    def moveFileFromCollectionWithManyFiles(self, streams):
-        for i in range(0, streams):
-            self.pool.apply(fuse_MoveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
+                # Create files
+                for j in range(0, files_per_stream):
+                    with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
+                        f.write(data)
 
-    @profiled
-    def removeFileFromCollectionWithManyFiles(self, streams):
-        for i in range(0, streams):
-            self.pool.apply(fuse_DeleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
+    Test().runTest()
+
+def fuse_readContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.readContentsFromCollectionWithManyFiles()
 
+        @profiled
+        def readContentsFromCollectionWithManyFiles(self):
+            for i in range(0, streams):
+                d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
+                for j in range(0, files_per_stream):
+                    with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
+                        self.assertEqual(data, f.read())
+
+    Test().runTest()
+
+def fuse_moveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.moveFileFromCollectionWithManyFiles()
+
+        @profiled
+        def moveFileFromCollectionWithManyFiles(self):
+            d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+            self.assertIn(filename, d1)
+
+            os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
+
+            d1 = llfuse.listdir(os.path.join(mounttmp))
+            self.assertIn('moved_from_'+stream+'_'+filename, d1)
+
+            d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+            self.assertNotIn(filename, d1)
+
+    Test().runTest()
+
+def fuse_deleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.deleteFileFromCollectionWithManyFiles()
+
+        @profiled
+        def deleteFileFromCollectionWithManyFiles(self):
+            os.remove(os.path.join(mounttmp, stream, filename))
+
+    Test().runTest()
+
+# Create a collection with two streams, each with 200 files
+class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
+    def setUp(self):
+        super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
+
+    @slow_test
     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
         collection = arvados.collection.Collection(api_client=self.api)
         collection.save_new()
@@ -180,10 +237,9 @@ class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
 
         streams = 2
         files_per_stream = 200
-
         data = 'x'
 
-        self.createCollectionWithManyFiles(streams, files_per_stream, data)
+        self.pool.apply(fuse_createCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
 
         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
 
@@ -194,10 +250,11 @@ class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
 
         # Read file contents
-        self.readContentsOfCollectionWithManyFiles(streams, files_per_stream, data)
+        self.pool.apply(fuse_readContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
 
         # Move file0.txt out of the streams into .
-        self.moveFileFromCollectionWithManyFiles(streams)
+        for i in range(0, streams):
+            self.pool.apply(fuse_moveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
 
         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
 
@@ -215,7 +272,8 @@ class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
 
         # Delete 'file1.txt' from all the streams
-        self.removeFileFromCollectionWithManyFiles(streams)
+        for i in range(0, streams):
+            self.pool.apply(fuse_deleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
 
         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
 
@@ -231,10 +289,14 @@ class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
             for j in range(2, files_per_stream):
                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
 
+
 def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
     class Test(unittest.TestCase):
         def runTest(self):
-            #os.rename(os.path.join(mounttmp, collection1, stream, filename), os.path.join(mounttmp, collection2, stream, filename))
+            self.magicDirTest_moveFileFromCollection()
+
+        @profiled
+        def magicDirTest_moveFileFromCollection(self):
             os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
 
     Test().runTest()
@@ -242,6 +304,10 @@ def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stre
 def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
     class Test(unittest.TestCase):
         def runTest(self):
+            self.magicDirTest_removeFileFromCollection()
+
+        @profiled
+        def magicDirTest_removeFileFromCollection(self):
             os.remove(os.path.join(mounttmp, collection1, filename))
 
     Test().runTest()
@@ -267,24 +333,12 @@ class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestB
         files = {}
         for j in range(0, files_per_stream):
             files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
-            #files[os.path.join(self.mounttmp, collection, 'stream'+str(i)+'/file'+str(j)+'.txt')] = data
 
-        for k, v in files.items():
+        for k, v in viewItems(files):
             with open(os.path.join(self.mounttmp, collection, k)) as f:
                 self.assertEqual(v, f.read())
 
-    @profiled
-    def magicDirTest_moveFileFromCollection(self, from_collection, to_collection):
-        self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
-              to_collection.manifest_locator(), 'stream0', 'file0.txt',))
-        from_collection.update()
-        to_collection.update()
-
-    @profiled
-    def magicDirTest_removeFileFromCollection(self, collection):
-        self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection.manifest_locator(), 'stream0', 'file1.txt',))
-        collection.update()
-
+    @slow_test
     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
         streams = 2
         files_per_stream = 200
@@ -300,18 +354,31 @@ class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestB
         self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
 
         # Move file0.txt out of the collection2 into collection1
-        self.magicDirTest_moveFileFromCollection(collection2, collection1)
+        self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(),
+              collection1.manifest_locator(), 'stream0', 'file0.txt',))
         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
         self.assertFalse('file0.txt' in updated_collection['manifest_text'])
         self.assertTrue('file1.txt' in updated_collection['manifest_text'])
 
         # Delete file1.txt from collection2
-        self.magicDirTest_removeFileFromCollection(collection2)
+        self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(), 'stream0', 'file1.txt',))
         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
         self.assertFalse('file1.txt' in updated_collection['manifest_text'])
         self.assertTrue('file2.txt' in updated_collection['manifest_text'])
 
 
+def magicDirTest_MoveAllFilesFromCollection(mounttmp, from_collection, to_collection, stream, files_per_stream):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.magicDirTest_moveAllFilesFromCollection()
+
+        @profiled
+        def magicDirTest_moveAllFilesFromCollection(self):
+            for j in range(0, files_per_stream):
+                os.rename(os.path.join(mounttmp, from_collection, 'file'+str(j)+'.txt'), os.path.join(mounttmp, to_collection, 'file'+str(j)+'.txt'))
+
+    Test().runTest()
+
 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
     def setUp(self):
         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
@@ -327,14 +394,7 @@ class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(Moun
         collection.save_new()
         return collection
 
-    @profiled
-    def magicDirTestMoveAllFiles_moveFilesFromCollection(self, from_collection, to_collection, files_per_stream):
-        for j in range(0, files_per_stream):
-            self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
-                  to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
-        from_collection.update()
-        to_collection.update()
-
+    @slow_test
     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
         streams = 2
         files_per_stream = 200
@@ -348,16 +408,17 @@ class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(Moun
         self.make_mount(fuse.MagicDirectory)
 
         # Move all files from collection2 into collection1
-        self.magicDirTestMoveAllFiles_moveFilesFromCollection(collection2, collection1, files_per_stream)
+        self.pool.apply(magicDirTest_MoveAllFilesFromCollection, (self.mounttmp, collection2.manifest_locator(),
+                  collection1.manifest_locator(), 'stream0', files_per_stream,))
 
         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
         for name in file_names:
-          self.assertFalse(name in updated_collection['manifest_text'])
+            self.assertFalse(name in updated_collection['manifest_text'])
 
         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
         for name in file_names:
-          self.assertTrue(name in updated_collection['manifest_text'])
+            self.assertTrue(name in updated_collection['manifest_text'])
 
 
 # Move one file at a time from one collection into another
@@ -375,14 +436,12 @@ class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(Moun
         collection.save_new()
         return collection
 
-    @profiled
     def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
         for j in range(0, files_per_stream):
             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
-            from_collection.update()
-            to_collection.update()
 
+    @slow_test
     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
         streams = 2
         files_per_stream = 200
@@ -401,15 +460,21 @@ class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(Moun
         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
         for name in file_names:
-          self.assertFalse(name in updated_collection['manifest_text'])
+            self.assertFalse(name in updated_collection['manifest_text'])
 
         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
         for name in file_names:
-          self.assertTrue(name in updated_collection['manifest_text'])
+            self.assertTrue(name in updated_collection['manifest_text'])
 
 class FuseListLargeProjectContents(MountTestBase):
     @profiled
-    def listLargeProjectContents(self):
+    def getProjectWithManyCollections(self):
+        project_contents = llfuse.listdir(self.mounttmp)
+        self.assertEqual(201, len(project_contents))
+        self.assertIn('Collection_1', project_contents)
+
+    @profiled
+    def listContentsInProjectWithManyCollections(self):
         project_contents = llfuse.listdir(self.mounttmp)
         self.assertEqual(201, len(project_contents))
         self.assertIn('Collection_1', project_contents)
@@ -418,7 +483,9 @@ class FuseListLargeProjectContents(MountTestBase):
             collection_contents = llfuse.listdir(os.path.join(self.mounttmp, collection_name))
             self.assertIn('baz', collection_contents)
 
+    @slow_test
     def test_listLargeProjectContents(self):
         self.make_mount(fuse.ProjectDirectory,
                         project_object=run_test_server.fixture('groups')['project_with_201_collections'])
-        self.listLargeProjectContents()
+        self.getProjectWithManyCollections()
+        self.listContentsInProjectWithManyCollections()