6219: refactor functions and profile smaller fuse related blocks of code
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 import arvados
2 import arvados_fuse as fuse
3 import llfuse
4 import logging
5 import os
6 import sys
7 import unittest
8 from .. import run_test_server
9 from ..mount_test_base import MountTestBase
10
11 logger = logging.getLogger('arvados.arv-mount')
12
13 from performance_profiler import profiled
14
15 def fuse_createCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
16     class Test(unittest.TestCase):
17         def runTest(self):
18             self.createCollectionWithMultipleBlocks()
19
20         @profiled
21         def createCollectionWithMultipleBlocks(self):
22             for i in range(0, streams):
23                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
24
25                 # Create files
26                 for j in range(0, files_per_stream):
27                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
28                         f.write(data)
29
30     Test().runTest()
31
32 def fuse_readContentsFromCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
33     class Test(unittest.TestCase):
34         def runTest(self):
35             self.readContentsFromCollectionWithMultipleBlocks()
36
37         @profiled
38         def readContentsFromCollectionWithMultipleBlocks(self):
39             for i in range(0, streams):
40                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
41                 for j in range(0, files_per_stream):
42                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
43                         self.assertEqual(data, f.read())
44
45     Test().runTest()
46
47 def fuse_moveFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
48     class Test(unittest.TestCase):
49         def runTest(self):
50             self.moveFileFromCollectionWithMultipleBlocks()
51
52         @profiled
53         def moveFileFromCollectionWithMultipleBlocks(self):
54             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
55             self.assertIn(filename, d1)
56
57             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
58
59             d1 = llfuse.listdir(os.path.join(mounttmp))
60             self.assertIn('moved_from_'+stream+'_'+filename, d1)
61
62             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
63             self.assertNotIn(filename, d1)
64
65     Test().runTest()
66
67 def fuse_deleteFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
68     class Test(unittest.TestCase):
69         def runTest(self):
70             self.deleteFileFromCollectionWithMultipleBlocks()
71
72         @profiled
73         def deleteFileFromCollectionWithMultipleBlocks(self):
74             os.remove(os.path.join(mounttmp, stream, filename))
75
76     Test().runTest()
77
78 # Create a collection with 2 streams, 3 files_per_stream, 2 blocks_per_file, 2**26 bytes_per_block
79 class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
80     def setUp(self):
81         super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
82
83     def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
84         collection = arvados.collection.Collection(api_client=self.api)
85         collection.save_new()
86
87         m = self.make_mount(fuse.CollectionDirectory)
88         with llfuse.lock:
89             m.new_collection(collection.api_response(), collection)
90         self.assertTrue(m.writable())
91
92         streams = 2
93         files_per_stream = 3
94         blocks_per_file = 2
95         bytes_per_block = 2**26
96
97         data = 'x' * blocks_per_file * bytes_per_block
98
99         self.pool.apply(fuse_createCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
100
101         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
102
103         for i in range(0, streams):
104             self.assertIn('./stream' + str(i), collection2["manifest_text"])
105
106         for i in range(0, files_per_stream):
107             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
108
109         # Read file contents
110         self.pool.apply(fuse_readContentsFromCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
111
112         # Move file0.txt out of the streams into .
113         for i in range(0, streams):
114             self.pool.apply(fuse_moveFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
115
116         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
117
118         manifest_streams = collection2['manifest_text'].split('\n')
119         self.assertEqual(4, len(manifest_streams))
120
121         for i in range(0, streams):
122             self.assertIn('file0.txt', manifest_streams[0])
123
124         for i in range(0, streams):
125             self.assertNotIn('file0.txt', manifest_streams[i+1])
126
127         for i in range(0, streams):
128             for j in range(1, files_per_stream):
129                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
130
131         # Delete 'file1.txt' from all the streams
132         for i in range(0, streams):
133             self.pool.apply(fuse_deleteFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
134
135         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
136
137         manifest_streams = collection2['manifest_text'].split('\n')
138         self.assertEqual(4, len(manifest_streams))
139
140         for i in range(0, streams):
141             self.assertIn('file0.txt', manifest_streams[0])
142
143         self.assertNotIn('file1.txt', collection2['manifest_text'])
144
145         for i in range(0, streams):
146             for j in range(2, files_per_stream):
147                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
148
149
150 def fuse_createCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
151     class Test(unittest.TestCase):
152         def runTest(self):
153             self.createCollectionWithManyFiles()
154
155         @profiled
156         def createCollectionWithManyFiles(self):
157             for i in range(0, streams):
158                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
159
160                 # Create files
161                 for j in range(0, files_per_stream):
162                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
163                         f.write(data)
164
165     Test().runTest()
166
167 def fuse_readContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
168     class Test(unittest.TestCase):
169         def runTest(self):
170             self.readContentsFromCollectionWithManyFiles()
171
172         @profiled
173         def readContentsFromCollectionWithManyFiles(self):
174             for i in range(0, streams):
175                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
176                 for j in range(0, files_per_stream):
177                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
178                         self.assertEqual(data, f.read())
179
180     Test().runTest()
181
182 def fuse_moveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
183     class Test(unittest.TestCase):
184         def runTest(self):
185             self.moveFileFromCollectionWithManyFiles()
186
187         @profiled
188         def moveFileFromCollectionWithManyFiles(self):
189             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
190             self.assertIn(filename, d1)
191
192             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
193
194             d1 = llfuse.listdir(os.path.join(mounttmp))
195             self.assertIn('moved_from_'+stream+'_'+filename, d1)
196
197             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
198             self.assertNotIn(filename, d1)
199
200     Test().runTest()
201
202 def fuse_deleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
203     class Test(unittest.TestCase):
204         def runTest(self):
205             self.deleteFileFromCollectionWithManyFiles()
206
207         @profiled
208         def deleteFileFromCollectionWithManyFiles(self):
209             os.remove(os.path.join(mounttmp, stream, filename))
210
211     Test().runTest()
212
213 # Create a collection with two streams, each with 200 files
214 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
215     def setUp(self):
216         super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
217
218     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
219         collection = arvados.collection.Collection(api_client=self.api)
220         collection.save_new()
221
222         m = self.make_mount(fuse.CollectionDirectory)
223         with llfuse.lock:
224             m.new_collection(collection.api_response(), collection)
225         self.assertTrue(m.writable())
226
227         streams = 2
228         files_per_stream = 200
229         data = 'x'
230
231         self.pool.apply(fuse_createCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
232
233         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
234
235         for i in range(0, streams):
236             self.assertIn('./stream' + str(i), collection2["manifest_text"])
237
238         for i in range(0, files_per_stream):
239             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
240
241         # Read file contents
242         self.pool.apply(fuse_readContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
243
244         # Move file0.txt out of the streams into .
245         for i in range(0, streams):
246             self.pool.apply(fuse_moveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
247
248         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
249
250         manifest_streams = collection2['manifest_text'].split('\n')
251         self.assertEqual(4, len(manifest_streams))
252
253         for i in range(0, streams):
254             self.assertIn('file0.txt', manifest_streams[0])
255
256         for i in range(0, streams):
257             self.assertNotIn('file0.txt', manifest_streams[i+1])
258
259         for i in range(0, streams):
260             for j in range(1, files_per_stream):
261                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
262
263         # Delete 'file1.txt' from all the streams
264         for i in range(0, streams):
265             self.pool.apply(fuse_deleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
266
267         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
268
269         manifest_streams = collection2['manifest_text'].split('\n')
270         self.assertEqual(4, len(manifest_streams))
271
272         for i in range(0, streams):
273             self.assertIn('file0.txt', manifest_streams[0])
274
275         self.assertNotIn('file1.txt', collection2['manifest_text'])
276
277         for i in range(0, streams):
278             for j in range(2, files_per_stream):
279                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
280
281
282 def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
283     class Test(unittest.TestCase):
284         def runTest(self):
285             self.magicDirTest_moveFileFromCollection()
286
287         @profiled
288         def magicDirTest_moveFileFromCollection(self):
289             os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
290
291     Test().runTest()
292
293 def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
294     class Test(unittest.TestCase):
295         def runTest(self):
296             self.magicDirTest_removeFileFromCollection()
297
298         @profiled
299         def magicDirTest_removeFileFromCollection(self):
300             os.remove(os.path.join(mounttmp, collection1, filename))
301
302     Test().runTest()
303
304 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
305     def setUp(self):
306         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
307
308     @profiled
309     def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
310         # Create collection
311         collection = arvados.collection.Collection(api_client=self.api)
312         for j in range(0, files_per_stream):
313             with collection.open("file"+str(j)+".txt", "w") as f:
314                 f.write(data)
315         collection.save_new()
316         return collection
317
318     @profiled
319     def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, data='x'):
320         mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
321
322         files = {}
323         for j in range(0, files_per_stream):
324             files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
325
326         for k, v in files.items():
327             with open(os.path.join(self.mounttmp, collection, k)) as f:
328                 self.assertEqual(v, f.read())
329
330     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
331         streams = 2
332         files_per_stream = 200
333         data = 'x'
334
335         collection1 = self.magicDirTest_createCollectionWithManyFiles()
336         # Create collection with multiple files
337         collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, data)
338
339         # Mount FuseMagicDir
340         self.make_mount(fuse.MagicDirectory)
341
342         self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
343
344         # Move file0.txt out of the collection2 into collection1
345         self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(),
346               collection1.manifest_locator(), 'stream0', 'file0.txt',))
347         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
348         self.assertFalse('file0.txt' in updated_collection['manifest_text'])
349         self.assertTrue('file1.txt' in updated_collection['manifest_text'])
350
351         # Delete file1.txt from collection2
352         self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(), 'stream0', 'file1.txt',))
353         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
354         self.assertFalse('file1.txt' in updated_collection['manifest_text'])
355         self.assertTrue('file2.txt' in updated_collection['manifest_text'])
356
357
358 def magicDirTest_MoveAllFilesFromCollection(mounttmp, from_collection, to_collection, stream, files_per_stream):
359     class Test(unittest.TestCase):
360         def runTest(self):
361             self.magicDirTest_moveAllFilesFromCollection()
362
363         @profiled
364         def magicDirTest_moveAllFilesFromCollection(self):
365             for j in range(0, files_per_stream):
366                 os.rename(os.path.join(mounttmp, from_collection, 'file'+str(j)+'.txt'), os.path.join(mounttmp, to_collection, 'file'+str(j)+'.txt'))
367
368     Test().runTest()
369
370 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
371     def setUp(self):
372         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
373
374     @profiled
375     def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
376             blocks_per_file=0, bytes_per_block=0, data='x'):
377         # Create collection
378         collection = arvados.collection.Collection(api_client=self.api)
379         for j in range(0, files_per_stream):
380             with collection.open("file"+str(j)+".txt", "w") as f:
381                 f.write(data)
382         collection.save_new()
383         return collection
384
385     @profiled
386     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
387         streams = 2
388         files_per_stream = 200
389         data = 'x'
390
391         collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
392         # Create collection with multiple files
393         collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
394
395         # Mount FuseMagicDir
396         self.make_mount(fuse.MagicDirectory)
397
398         # Move all files from collection2 into collection1
399         self.pool.apply(magicDirTest_MoveAllFilesFromCollection, (self.mounttmp, collection2.manifest_locator(),
400                   collection1.manifest_locator(), 'stream0', files_per_stream,))
401
402         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
403         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
404         for name in file_names:
405             self.assertFalse(name in updated_collection['manifest_text'])
406
407         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
408         for name in file_names:
409             self.assertTrue(name in updated_collection['manifest_text'])
410
411
412 # Move one file at a time from one collection into another
413 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
414     def setUp(self):
415         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
416
417     @profiled
418     def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
419         # Create collection
420         collection = arvados.collection.Collection(api_client=self.api)
421         for j in range(0, files_per_stream):
422             with collection.open("file"+str(j)+".txt", "w") as f:
423                 f.write(data)
424         collection.save_new()
425         return collection
426
427     def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
428         for j in range(0, files_per_stream):
429             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
430                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
431
432     @profiled
433     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
434         streams = 2
435         files_per_stream = 200
436         data = 'x'
437
438         collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
439         # Create collection with multiple files
440         collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
441
442         # Mount FuseMagicDir
443         self.make_mount(fuse.MagicDirectory)
444
445         # Move all files from collection2 into collection1
446         self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
447
448         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
449         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
450         for name in file_names:
451             self.assertFalse(name in updated_collection['manifest_text'])
452
453         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
454         for name in file_names:
455             self.assertTrue(name in updated_collection['manifest_text'])
456
457 class FuseListLargeProjectContents(MountTestBase):
458     @profiled
459     def getProjectWithManyCollections(self):
460         project_contents = llfuse.listdir(self.mounttmp)
461         self.assertEqual(201, len(project_contents))
462         self.assertIn('Collection_1', project_contents)
463         return project_contents
464
465     @profiled
466     def listContentsInProjectWithManyCollections(self, project_contents):
467         project_contents = llfuse.listdir(self.mounttmp)
468         self.assertEqual(201, len(project_contents))
469         self.assertIn('Collection_1', project_contents)
470
471         for collection_name in project_contents:
472             collection_contents = llfuse.listdir(os.path.join(self.mounttmp, collection_name))
473             self.assertIn('baz', collection_contents)
474
475     def test_listLargeProjectContents(self):
476         self.make_mount(fuse.ProjectDirectory,
477                         project_object=run_test_server.fixture('groups')['project_with_201_collections'])
478         project_contents = self.getProjectWithManyCollections()
479         self.listContentsInProjectWithManyCollections(project_contents)