6219: profile even smaller portions of code
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 import arvados
2 import arvados_fuse as fuse
3 import llfuse
4 import logging
5 import os
6 import sys
7 import unittest
8 from .. import run_test_server
9 from ..mount_test_base import MountTestBase
10
11 logger = logging.getLogger('arvados.arv-mount')
12
13 from performance_profiler import profiled
14
15 def fuse_createCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
16     class Test(unittest.TestCase):
17         def runTest(self):
18             self.createCollectionWithMultipleBlocks()
19
20         @profiled
21         def createCollectionWithMultipleBlocks(self):
22             for i in range(0, streams):
23                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
24
25                 # Create files
26                 for j in range(0, files_per_stream):
27                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
28                         f.write(data)
29
30     Test().runTest()
31
32 def fuse_readContentsFromCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
33     class Test(unittest.TestCase):
34         def runTest(self):
35             self.readContentsFromCollectionWithMultipleBlocks()
36
37         @profiled
38         def readContentsFromCollectionWithMultipleBlocks(self):
39             for i in range(0, streams):
40                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
41                 for j in range(0, files_per_stream):
42                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
43                         self.assertEqual(data, f.read())
44
45     Test().runTest()
46
47 def fuse_moveFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
48     class Test(unittest.TestCase):
49         def runTest(self):
50             self.moveFileFromCollectionWithMultipleBlocks()
51
52         @profiled
53         def moveFileFromCollectionWithMultipleBlocks(self):
54             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
55             self.assertIn(filename, d1)
56
57             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
58
59             d1 = llfuse.listdir(os.path.join(mounttmp))
60             self.assertIn('moved_from_'+stream+'_'+filename, d1)
61
62             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
63             self.assertNotIn(filename, d1)
64
65     Test().runTest()
66
67 def fuse_deleteFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
68     class Test(unittest.TestCase):
69         def runTest(self):
70             self.deleteFileFromCollectionWithMultipleBlocks()
71
72         @profiled
73         def deleteFileFromCollectionWithMultipleBlocks(self):
74             os.remove(os.path.join(mounttmp, stream, filename))
75
76     Test().runTest()
77
78 # Create a collection with 2 streams, 3 files_per_stream, 2 blocks_per_file, 2**26 bytes_per_block
79 class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
80     def setUp(self):
81         super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
82
83     def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
84         collection = arvados.collection.Collection(api_client=self.api)
85         collection.save_new()
86
87         m = self.make_mount(fuse.CollectionDirectory)
88         with llfuse.lock:
89             m.new_collection(collection.api_response(), collection)
90         self.assertTrue(m.writable())
91
92         streams = 2
93         files_per_stream = 3
94         blocks_per_file = 2
95         bytes_per_block = 2**26
96
97         data = 'x' * blocks_per_file * bytes_per_block
98
99         self.pool.apply(fuse_createCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
100
101         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
102
103         for i in range(0, streams):
104             self.assertIn('./stream' + str(i), collection2["manifest_text"])
105
106         for i in range(0, files_per_stream):
107             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
108
109         # Read file contents
110         self.pool.apply(fuse_readContentsFromCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
111
112         # Move file0.txt out of the streams into .
113         for i in range(0, streams):
114             self.pool.apply(fuse_moveFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
115
116         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
117
118         manifest_streams = collection2['manifest_text'].split('\n')
119         self.assertEqual(4, len(manifest_streams))
120
121         for i in range(0, streams):
122             self.assertIn('file0.txt', manifest_streams[0])
123
124         for i in range(0, streams):
125             self.assertNotIn('file0.txt', manifest_streams[i+1])
126
127         for i in range(0, streams):
128             for j in range(1, files_per_stream):
129                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
130
131         # Delete 'file1.txt' from all the streams
132         for i in range(0, streams):
133             self.pool.apply(fuse_deleteFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
134
135         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
136
137         manifest_streams = collection2['manifest_text'].split('\n')
138         self.assertEqual(4, len(manifest_streams))
139
140         for i in range(0, streams):
141             self.assertIn('file0.txt', manifest_streams[0])
142
143         self.assertNotIn('file1.txt', collection2['manifest_text'])
144
145         for i in range(0, streams):
146             for j in range(2, files_per_stream):
147                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
148
149
150 def fuse_createCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
151     class Test(unittest.TestCase):
152         def runTest(self):
153             self.createCollectionWithManyFiles()
154
155         @profiled
156         def createCollectionWithManyFiles(self):
157             for i in range(0, streams):
158                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
159
160                 # Create files
161                 for j in range(0, files_per_stream):
162                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
163                         f.write(data)
164
165     Test().runTest()
166
167 def fuse_readContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
168     class Test(unittest.TestCase):
169         def runTest(self):
170             self.readContentsFromCollectionWithManyFiles()
171
172         @profiled
173         def readContentsFromCollectionWithManyFiles(self):
174             for i in range(0, streams):
175                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
176                 for j in range(0, files_per_stream):
177                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
178                         self.assertEqual(data, f.read())
179
180     Test().runTest()
181
182 def fuse_moveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
183     class Test(unittest.TestCase):
184         def runTest(self):
185             self.moveFileFromCollectionWithManyFiles()
186
187         @profiled
188         def moveFileFromCollectionWithManyFiles(self):
189             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
190             self.assertIn(filename, d1)
191
192             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
193
194             d1 = llfuse.listdir(os.path.join(mounttmp))
195             self.assertIn('moved_from_'+stream+'_'+filename, d1)
196
197             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
198             self.assertNotIn(filename, d1)
199
200     Test().runTest()
201
202 def fuse_deleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
203     class Test(unittest.TestCase):
204         def runTest(self):
205             self.deleteFileFromCollectionWithManyFiles()
206
207         @profiled
208         def deleteFileFromCollectionWithManyFiles(self):
209             os.remove(os.path.join(mounttmp, stream, filename))
210
211     Test().runTest()
212
213 # Create a collection with two streams, each with 200 files
214 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
215     def setUp(self):
216         super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
217
218     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
219         collection = arvados.collection.Collection(api_client=self.api)
220         collection.save_new()
221
222         m = self.make_mount(fuse.CollectionDirectory)
223         with llfuse.lock:
224             m.new_collection(collection.api_response(), collection)
225         self.assertTrue(m.writable())
226
227         streams = 2
228         files_per_stream = 200
229
230         data = 'x'
231
232         self.pool.apply(fuse_createCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
233
234         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
235
236         for i in range(0, streams):
237             self.assertIn('./stream' + str(i), collection2["manifest_text"])
238
239         for i in range(0, files_per_stream):
240             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
241
242         # Read file contents
243         self.pool.apply(fuse_readContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
244
245         # Move file0.txt out of the streams into .
246         for i in range(0, streams):
247             self.pool.apply(fuse_moveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
248
249         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
250
251         manifest_streams = collection2['manifest_text'].split('\n')
252         self.assertEqual(4, len(manifest_streams))
253
254         for i in range(0, streams):
255             self.assertIn('file0.txt', manifest_streams[0])
256
257         for i in range(0, streams):
258             self.assertNotIn('file0.txt', manifest_streams[i+1])
259
260         for i in range(0, streams):
261             for j in range(1, files_per_stream):
262                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
263
264         # Delete 'file1.txt' from all the streams
265         for i in range(0, streams):
266             self.pool.apply(fuse_deleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
267
268         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
269
270         manifest_streams = collection2['manifest_text'].split('\n')
271         self.assertEqual(4, len(manifest_streams))
272
273         for i in range(0, streams):
274             self.assertIn('file0.txt', manifest_streams[0])
275
276         self.assertNotIn('file1.txt', collection2['manifest_text'])
277
278         for i in range(0, streams):
279             for j in range(2, files_per_stream):
280                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
281
282
283 def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
284     class Test(unittest.TestCase):
285         def runTest(self):
286             #os.rename(os.path.join(mounttmp, collection1, stream, filename), os.path.join(mounttmp, collection2, stream, filename))
287             os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
288
289     Test().runTest()
290
291 def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
292     class Test(unittest.TestCase):
293         def runTest(self):
294             os.remove(os.path.join(mounttmp, collection1, filename))
295
296     Test().runTest()
297
298 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
299     def setUp(self):
300         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
301
302     @profiled
303     def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
304         # Create collection
305         collection = arvados.collection.Collection(api_client=self.api)
306         for j in range(0, files_per_stream):
307             with collection.open("file"+str(j)+".txt", "w") as f:
308                 f.write(data)
309         collection.save_new()
310         return collection
311
312     @profiled
313     def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, data='x'):
314         mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
315
316         files = {}
317         for j in range(0, files_per_stream):
318             files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
319             #files[os.path.join(self.mounttmp, collection, 'stream'+str(i)+'/file'+str(j)+'.txt')] = data
320
321         for k, v in files.items():
322             with open(os.path.join(self.mounttmp, collection, k)) as f:
323                 self.assertEqual(v, f.read())
324
325     @profiled
326     def magicDirTest_moveFileFromCollection(self, from_collection, to_collection):
327         self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
328               to_collection.manifest_locator(), 'stream0', 'file0.txt',))
329         from_collection.update()
330         to_collection.update()
331
332     @profiled
333     def magicDirTest_removeFileFromCollection(self, collection):
334         self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection.manifest_locator(), 'stream0', 'file1.txt',))
335         collection.update()
336
337     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
338         streams = 2
339         files_per_stream = 200
340         data = 'x'
341
342         collection1 = self.magicDirTest_createCollectionWithManyFiles()
343         # Create collection with multiple files
344         collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, data)
345
346         # Mount FuseMagicDir
347         self.make_mount(fuse.MagicDirectory)
348
349         self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
350
351         # Move file0.txt out of the collection2 into collection1
352         self.magicDirTest_moveFileFromCollection(collection2, collection1)
353         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
354         self.assertFalse('file0.txt' in updated_collection['manifest_text'])
355         self.assertTrue('file1.txt' in updated_collection['manifest_text'])
356
357         # Delete file1.txt from collection2
358         self.magicDirTest_removeFileFromCollection(collection2)
359         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
360         self.assertFalse('file1.txt' in updated_collection['manifest_text'])
361         self.assertTrue('file2.txt' in updated_collection['manifest_text'])
362
363
364 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
365     def setUp(self):
366         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
367
368     @profiled
369     def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
370             blocks_per_file=0, bytes_per_block=0, data='x'):
371         # Create collection
372         collection = arvados.collection.Collection(api_client=self.api)
373         for j in range(0, files_per_stream):
374             with collection.open("file"+str(j)+".txt", "w") as f:
375                 f.write(data)
376         collection.save_new()
377         return collection
378
379     @profiled
380     def magicDirTestMoveAllFiles_moveFilesFromCollection(self, from_collection, to_collection, files_per_stream):
381         for j in range(0, files_per_stream):
382             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
383                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
384         from_collection.update()
385         to_collection.update()
386
387     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
388         streams = 2
389         files_per_stream = 200
390         data = 'x'
391
392         collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
393         # Create collection with multiple files
394         collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
395
396         # Mount FuseMagicDir
397         self.make_mount(fuse.MagicDirectory)
398
399         # Move all files from collection2 into collection1
400         self.magicDirTestMoveAllFiles_moveFilesFromCollection(collection2, collection1, files_per_stream)
401
402         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
403         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
404         for name in file_names:
405           self.assertFalse(name in updated_collection['manifest_text'])
406
407         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
408         for name in file_names:
409           self.assertTrue(name in updated_collection['manifest_text'])
410
411
412 # Move one file at a time from one collection into another
413 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
414     def setUp(self):
415         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
416
417     @profiled
418     def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
419         # Create collection
420         collection = arvados.collection.Collection(api_client=self.api)
421         for j in range(0, files_per_stream):
422             with collection.open("file"+str(j)+".txt", "w") as f:
423                 f.write(data)
424         collection.save_new()
425         return collection
426
427     @profiled
428     def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
429         for j in range(0, files_per_stream):
430             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
431                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
432             from_collection.update()
433             to_collection.update()
434
435     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
436         streams = 2
437         files_per_stream = 200
438         data = 'x'
439
440         collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
441         # Create collection with multiple files
442         collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
443
444         # Mount FuseMagicDir
445         self.make_mount(fuse.MagicDirectory)
446
447         # Move all files from collection2 into collection1
448         self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
449
450         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
451         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
452         for name in file_names:
453           self.assertFalse(name in updated_collection['manifest_text'])
454
455         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
456         for name in file_names:
457           self.assertTrue(name in updated_collection['manifest_text'])
458
459 class FuseListLargeProjectContents(MountTestBase):
460     @profiled
461     def getProjectWithManyCollections(self):
462         project_contents = llfuse.listdir(self.mounttmp)
463         self.assertEqual(201, len(project_contents))
464         self.assertIn('Collection_1', project_contents)
465         return project_contents
466
467     @profiled
468     def listContentsInProjectWithManyCollections(self, project_contents):
469         project_contents = llfuse.listdir(self.mounttmp)
470         self.assertEqual(201, len(project_contents))
471         self.assertIn('Collection_1', project_contents)
472
473         for collection_name in project_contents:
474             collection_contents = llfuse.listdir(os.path.join(self.mounttmp, collection_name))
475             self.assertIn('baz', collection_contents)
476
477     def test_listLargeProjectContents(self):
478         self.make_mount(fuse.ProjectDirectory,
479                         project_object=run_test_server.fixture('groups')['project_with_201_collections'])
480         project_contents = self.getProjectWithManyCollections()
481         self.listContentsInProjectWithManyCollections(project_contents)