6219: some more magic dir based testing
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 import arvados
2 import arvados.safeapi
3 import arvados_fuse as fuse
4 import glob
5 import json
6 import llfuse
7 import os
8 import shutil
9 import subprocess
10 import sys
11 import tempfile
12 import threading
13 import time
14 import unittest
15 import logging
16 import multiprocessing
17 from .. import run_test_server
18 from ..mount_test_base import MountTestBase
19
20 logger = logging.getLogger('arvados.arv-mount')
21
22 from performance_profiler import profiled
23
24 @profiled
25 def fuse_CreateCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, blocks_per_file=1, bytes_per_block=1, data='x'):
26     class Test(unittest.TestCase):
27         def runTest(self):
28             file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
29
30             for i in range(0, streams):
31                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
32
33                 # Create files
34                 for j in range(0, files_per_stream):
35                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
36                         f.write(data)
37
38     Test().runTest()
39
40 @profiled
41 def fuse_ReadContentsFromCollectionWithManyFiles(mounttmp, streams, files_per_stream, content):
42     class Test(unittest.TestCase):
43         def runTest(self):
44             for i in range(0, streams):
45                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
46                 for j in range(0, files_per_stream):
47                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
48                         self.assertEqual(content, f.read())
49
50     Test().runTest()
51
52 @profiled
53 def fuse_MoveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
54     class Test(unittest.TestCase):
55         def runTest(self):
56             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
57             self.assertIn(filename, d1)
58
59             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, filename))
60
61             d1 = llfuse.listdir(os.path.join(mounttmp))
62             self.assertIn(filename, d1)
63
64             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
65             self.assertNotIn(filename, d1)
66
67     Test().runTest()
68
69 @profiled
70 def fuse_DeleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
71     class Test(unittest.TestCase):
72         def runTest(self):
73             os.remove(os.path.join(mounttmp, stream, filename))
74
75     Test().runTest()
76
77 # Create a collection with two streams, each with 200 files
78 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
79     def setUp(self):
80         super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
81
82     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
83         collection = arvados.collection.Collection(api_client=self.api)
84         collection.save_new()
85
86         m = self.make_mount(fuse.CollectionDirectory)
87         with llfuse.lock:
88             m.new_collection(collection.api_response(), collection)
89         self.assertTrue(m.writable())
90
91         streams = 2
92         files_per_stream = 200
93         blocks_per_file = 1
94         bytes_per_block = 1
95
96         data = 'x' * blocks_per_file * bytes_per_block
97
98         self.pool.apply(fuse_CreateCollectionWithManyFiles, (self.mounttmp, streams,
99             files_per_stream, blocks_per_file, bytes_per_block, data))
100
101         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
102
103         for i in range(0, streams):
104             self.assertIn('./stream' + str(i), collection2["manifest_text"])
105
106         for i in range(0, files_per_stream):
107             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
108
109         # Read file contents
110         self.pool.apply(fuse_ReadContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
111
112         # Move file0.txt out of the streams into .
113         for i in range(0, streams):
114             self.pool.apply(fuse_MoveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
115
116         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
117
118         manifest_streams = collection2['manifest_text'].split('\n')
119         self.assertEqual(4, len(manifest_streams))
120
121         for i in range(0, streams):
122             self.assertIn('file0.txt', manifest_streams[0])
123
124         for i in range(0, streams):
125             self.assertNotIn('file0.txt', manifest_streams[i+1])
126
127         for i in range(0, streams):
128             for j in range(1, files_per_stream):
129                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
130
131         # Delete 'file1.txt' from all the streams
132         for i in range(0, streams):
133             self.pool.apply(fuse_DeleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
134
135         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
136
137         manifest_streams = collection2['manifest_text'].split('\n')
138         self.assertEqual(4, len(manifest_streams))
139
140         for i in range(0, streams):
141             self.assertIn('file0.txt', manifest_streams[0])
142
143         self.assertNotIn('file1.txt', collection2['manifest_text'])
144
145         for i in range(0, streams):
146             for j in range(2, files_per_stream):
147                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
148
149 def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
150     class Test(unittest.TestCase):
151         def runTest(self):
152             #os.rename(os.path.join(mounttmp, collection1, stream, filename), os.path.join(mounttmp, collection2, stream, filename))
153             os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
154
155     Test().runTest()
156
157 def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
158     class Test(unittest.TestCase):
159         def runTest(self):
160             os.remove(os.path.join(mounttmp, collection1, filename))
161
162     Test().runTest()
163
164
165 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
166     def setUp(self):
167         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
168
169     @profiled
170     def magicDirTest_createCollectionWithManyFiles(self, streams=1, files_per_stream=1,
171             blocks_per_file=1, bytes_per_block=1, data='x'):
172         # Create collection
173         collection = arvados.collection.Collection(api_client=self.api)
174         for j in range(0, files_per_stream):
175             with collection.open("file"+str(j)+".txt", "w") as f:
176                 f.write(data)
177         collection.save_new()
178         return collection
179
180     @profiled
181     def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1,
182             blocks_per_file=1, bytes_per_block=1, data='x'):
183         mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
184
185         files = {}
186         for j in range(0, files_per_stream):
187             files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
188             #files[os.path.join(self.mounttmp, collection, 'stream'+str(i)+'/file'+str(j)+'.txt')] = data
189
190         for k, v in files.items():
191             with open(os.path.join(self.mounttmp, collection, k)) as f:
192                 self.assertEqual(v, f.read())
193
194     @profiled
195     def magicDirTest_moveFileFromCollection(self, from_collection, to_collection):
196         self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
197               to_collection.manifest_locator(), 'stream0', 'file1.txt',))
198         from_collection.update()
199         to_collection.update()
200
201     @profiled
202     def magicDirTest_removeFileFromCollection(self, collection):
203         self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection.manifest_locator(), 'stream0', 'file0.txt',))
204         collection.update()
205
206     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
207         streams = 2
208         files_per_stream = 200
209         blocks_per_file = 1
210         bytes_per_block = 1
211
212         data = 'x' * blocks_per_file * bytes_per_block
213
214         collection1 = self.magicDirTest_createCollectionWithManyFiles()
215         # Create collection with multiple files
216         collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream,
217                           blocks_per_file, bytes_per_block, data)
218
219         # Mount FuseMagicDir
220         self.make_mount(fuse.MagicDirectory)
221
222         self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams,
223             files_per_stream, blocks_per_file, bytes_per_block, data)
224
225         # Move file1.txt out of the collection2 into collection1
226         self.magicDirTest_moveFileFromCollection(collection2, collection1)
227         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
228         self.assertFalse('file1.txt' in updated_collection['manifest_text'])
229         self.assertTrue('file0.txt' in updated_collection['manifest_text'])
230
231         # Delete file0.txt from collection2
232         self.magicDirTest_removeFileFromCollection(collection2)
233         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
234         self.assertFalse('file0.txt' in updated_collection['manifest_text'])
235         self.assertTrue('file2.txt' in updated_collection['manifest_text'])