8784: Fix test for latest firefox.
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 import arvados
2 import arvados_fuse as fuse
3 import llfuse
4 import logging
5 import os
6 import sys
7 import unittest
8 from .. import run_test_server
9 from ..mount_test_base import MountTestBase
10 from ..slow_test import slow_test
11
12 logger = logging.getLogger('arvados.arv-mount')
13
14 from performance_profiler import profiled
15
16 def fuse_createCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
17     class Test(unittest.TestCase):
18         def runTest(self):
19             self.createCollectionWithMultipleBlocks()
20
21         @profiled
22         def createCollectionWithMultipleBlocks(self):
23             for i in range(0, streams):
24                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
25
26                 # Create files
27                 for j in range(0, files_per_stream):
28                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
29                         f.write(data)
30
31     Test().runTest()
32
33 def fuse_readContentsFromCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
34     class Test(unittest.TestCase):
35         def runTest(self):
36             self.readContentsFromCollectionWithMultipleBlocks()
37
38         @profiled
39         def readContentsFromCollectionWithMultipleBlocks(self):
40             for i in range(0, streams):
41                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
42                 for j in range(0, files_per_stream):
43                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
44                         self.assertEqual(data, f.read())
45
46     Test().runTest()
47
48 def fuse_moveFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
49     class Test(unittest.TestCase):
50         def runTest(self):
51             self.moveFileFromCollectionWithMultipleBlocks()
52
53         @profiled
54         def moveFileFromCollectionWithMultipleBlocks(self):
55             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
56             self.assertIn(filename, d1)
57
58             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
59
60             d1 = llfuse.listdir(os.path.join(mounttmp))
61             self.assertIn('moved_from_'+stream+'_'+filename, d1)
62
63             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
64             self.assertNotIn(filename, d1)
65
66     Test().runTest()
67
68 def fuse_deleteFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
69     class Test(unittest.TestCase):
70         def runTest(self):
71             self.deleteFileFromCollectionWithMultipleBlocks()
72
73         @profiled
74         def deleteFileFromCollectionWithMultipleBlocks(self):
75             os.remove(os.path.join(mounttmp, stream, filename))
76
77     Test().runTest()
78
79 # Create a collection with 2 streams, 3 files_per_stream, 2 blocks_per_file, 2**26 bytes_per_block
80 class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
81     def setUp(self):
82         super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
83
84     @slow_test
85     def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
86         collection = arvados.collection.Collection(api_client=self.api)
87         collection.save_new()
88
89         m = self.make_mount(fuse.CollectionDirectory)
90         with llfuse.lock:
91             m.new_collection(collection.api_response(), collection)
92         self.assertTrue(m.writable())
93
94         streams = 2
95         files_per_stream = 3
96         blocks_per_file = 2
97         bytes_per_block = 2**26
98
99         data = 'x' * blocks_per_file * bytes_per_block
100
101         self.pool.apply(fuse_createCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
102
103         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
104
105         for i in range(0, streams):
106             self.assertIn('./stream' + str(i), collection2["manifest_text"])
107
108         for i in range(0, files_per_stream):
109             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
110
111         # Read file contents
112         self.pool.apply(fuse_readContentsFromCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
113
114         # Move file0.txt out of the streams into .
115         for i in range(0, streams):
116             self.pool.apply(fuse_moveFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
117
118         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
119
120         manifest_streams = collection2['manifest_text'].split('\n')
121         self.assertEqual(4, len(manifest_streams))
122
123         for i in range(0, streams):
124             self.assertIn('file0.txt', manifest_streams[0])
125
126         for i in range(0, streams):
127             self.assertNotIn('file0.txt', manifest_streams[i+1])
128
129         for i in range(0, streams):
130             for j in range(1, files_per_stream):
131                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
132
133         # Delete 'file1.txt' from all the streams
134         for i in range(0, streams):
135             self.pool.apply(fuse_deleteFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
136
137         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
138
139         manifest_streams = collection2['manifest_text'].split('\n')
140         self.assertEqual(4, len(manifest_streams))
141
142         for i in range(0, streams):
143             self.assertIn('file0.txt', manifest_streams[0])
144
145         self.assertNotIn('file1.txt', collection2['manifest_text'])
146
147         for i in range(0, streams):
148             for j in range(2, files_per_stream):
149                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
150
151
152 def fuse_createCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
153     class Test(unittest.TestCase):
154         def runTest(self):
155             self.createCollectionWithManyFiles()
156
157         @profiled
158         def createCollectionWithManyFiles(self):
159             for i in range(0, streams):
160                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
161
162                 # Create files
163                 for j in range(0, files_per_stream):
164                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
165                         f.write(data)
166
167     Test().runTest()
168
169 def fuse_readContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
170     class Test(unittest.TestCase):
171         def runTest(self):
172             self.readContentsFromCollectionWithManyFiles()
173
174         @profiled
175         def readContentsFromCollectionWithManyFiles(self):
176             for i in range(0, streams):
177                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
178                 for j in range(0, files_per_stream):
179                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
180                         self.assertEqual(data, f.read())
181
182     Test().runTest()
183
184 def fuse_moveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
185     class Test(unittest.TestCase):
186         def runTest(self):
187             self.moveFileFromCollectionWithManyFiles()
188
189         @profiled
190         def moveFileFromCollectionWithManyFiles(self):
191             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
192             self.assertIn(filename, d1)
193
194             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
195
196             d1 = llfuse.listdir(os.path.join(mounttmp))
197             self.assertIn('moved_from_'+stream+'_'+filename, d1)
198
199             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
200             self.assertNotIn(filename, d1)
201
202     Test().runTest()
203
204 def fuse_deleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
205     class Test(unittest.TestCase):
206         def runTest(self):
207             self.deleteFileFromCollectionWithManyFiles()
208
209         @profiled
210         def deleteFileFromCollectionWithManyFiles(self):
211             os.remove(os.path.join(mounttmp, stream, filename))
212
213     Test().runTest()
214
215 # Create a collection with two streams, each with 200 files
216 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
217     def setUp(self):
218         super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
219
220     @slow_test
221     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
222         collection = arvados.collection.Collection(api_client=self.api)
223         collection.save_new()
224
225         m = self.make_mount(fuse.CollectionDirectory)
226         with llfuse.lock:
227             m.new_collection(collection.api_response(), collection)
228         self.assertTrue(m.writable())
229
230         streams = 2
231         files_per_stream = 200
232         data = 'x'
233
234         self.pool.apply(fuse_createCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
235
236         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
237
238         for i in range(0, streams):
239             self.assertIn('./stream' + str(i), collection2["manifest_text"])
240
241         for i in range(0, files_per_stream):
242             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
243
244         # Read file contents
245         self.pool.apply(fuse_readContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
246
247         # Move file0.txt out of the streams into .
248         for i in range(0, streams):
249             self.pool.apply(fuse_moveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
250
251         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
252
253         manifest_streams = collection2['manifest_text'].split('\n')
254         self.assertEqual(4, len(manifest_streams))
255
256         for i in range(0, streams):
257             self.assertIn('file0.txt', manifest_streams[0])
258
259         for i in range(0, streams):
260             self.assertNotIn('file0.txt', manifest_streams[i+1])
261
262         for i in range(0, streams):
263             for j in range(1, files_per_stream):
264                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
265
266         # Delete 'file1.txt' from all the streams
267         for i in range(0, streams):
268             self.pool.apply(fuse_deleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
269
270         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
271
272         manifest_streams = collection2['manifest_text'].split('\n')
273         self.assertEqual(4, len(manifest_streams))
274
275         for i in range(0, streams):
276             self.assertIn('file0.txt', manifest_streams[0])
277
278         self.assertNotIn('file1.txt', collection2['manifest_text'])
279
280         for i in range(0, streams):
281             for j in range(2, files_per_stream):
282                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
283
284
285 def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
286     class Test(unittest.TestCase):
287         def runTest(self):
288             self.magicDirTest_moveFileFromCollection()
289
290         @profiled
291         def magicDirTest_moveFileFromCollection(self):
292             os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
293
294     Test().runTest()
295
296 def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
297     class Test(unittest.TestCase):
298         def runTest(self):
299             self.magicDirTest_removeFileFromCollection()
300
301         @profiled
302         def magicDirTest_removeFileFromCollection(self):
303             os.remove(os.path.join(mounttmp, collection1, filename))
304
305     Test().runTest()
306
307 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
308     def setUp(self):
309         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
310
311     @profiled
312     def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
313         # Create collection
314         collection = arvados.collection.Collection(api_client=self.api)
315         for j in range(0, files_per_stream):
316             with collection.open("file"+str(j)+".txt", "w") as f:
317                 f.write(data)
318         collection.save_new()
319         return collection
320
321     @profiled
322     def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, data='x'):
323         mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
324
325         files = {}
326         for j in range(0, files_per_stream):
327             files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
328
329         for k, v in files.items():
330             with open(os.path.join(self.mounttmp, collection, k)) as f:
331                 self.assertEqual(v, f.read())
332
333     @slow_test
334     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
335         streams = 2
336         files_per_stream = 200
337         data = 'x'
338
339         collection1 = self.magicDirTest_createCollectionWithManyFiles()
340         # Create collection with multiple files
341         collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, data)
342
343         # Mount FuseMagicDir
344         self.make_mount(fuse.MagicDirectory)
345
346         self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
347
348         # Move file0.txt out of the collection2 into collection1
349         self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(),
350               collection1.manifest_locator(), 'stream0', 'file0.txt',))
351         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
352         self.assertFalse('file0.txt' in updated_collection['manifest_text'])
353         self.assertTrue('file1.txt' in updated_collection['manifest_text'])
354
355         # Delete file1.txt from collection2
356         self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(), 'stream0', 'file1.txt',))
357         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
358         self.assertFalse('file1.txt' in updated_collection['manifest_text'])
359         self.assertTrue('file2.txt' in updated_collection['manifest_text'])
360
361
362 def magicDirTest_MoveAllFilesFromCollection(mounttmp, from_collection, to_collection, stream, files_per_stream):
363     class Test(unittest.TestCase):
364         def runTest(self):
365             self.magicDirTest_moveAllFilesFromCollection()
366
367         @profiled
368         def magicDirTest_moveAllFilesFromCollection(self):
369             for j in range(0, files_per_stream):
370                 os.rename(os.path.join(mounttmp, from_collection, 'file'+str(j)+'.txt'), os.path.join(mounttmp, to_collection, 'file'+str(j)+'.txt'))
371
372     Test().runTest()
373
374 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
375     def setUp(self):
376         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
377
378     @profiled
379     def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
380             blocks_per_file=0, bytes_per_block=0, data='x'):
381         # Create collection
382         collection = arvados.collection.Collection(api_client=self.api)
383         for j in range(0, files_per_stream):
384             with collection.open("file"+str(j)+".txt", "w") as f:
385                 f.write(data)
386         collection.save_new()
387         return collection
388
389     @slow_test
390     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
391         streams = 2
392         files_per_stream = 200
393         data = 'x'
394
395         collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
396         # Create collection with multiple files
397         collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
398
399         # Mount FuseMagicDir
400         self.make_mount(fuse.MagicDirectory)
401
402         # Move all files from collection2 into collection1
403         self.pool.apply(magicDirTest_MoveAllFilesFromCollection, (self.mounttmp, collection2.manifest_locator(),
404                   collection1.manifest_locator(), 'stream0', files_per_stream,))
405
406         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
407         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
408         for name in file_names:
409             self.assertFalse(name in updated_collection['manifest_text'])
410
411         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
412         for name in file_names:
413             self.assertTrue(name in updated_collection['manifest_text'])
414
415
416 # Move one file at a time from one collection into another
417 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
418     def setUp(self):
419         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
420
421     @profiled
422     def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
423         # Create collection
424         collection = arvados.collection.Collection(api_client=self.api)
425         for j in range(0, files_per_stream):
426             with collection.open("file"+str(j)+".txt", "w") as f:
427                 f.write(data)
428         collection.save_new()
429         return collection
430
431     def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
432         for j in range(0, files_per_stream):
433             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
434                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
435
436     @slow_test
437     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
438         streams = 2
439         files_per_stream = 200
440         data = 'x'
441
442         collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
443         # Create collection with multiple files
444         collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
445
446         # Mount FuseMagicDir
447         self.make_mount(fuse.MagicDirectory)
448
449         # Move all files from collection2 into collection1
450         self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
451
452         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
453         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
454         for name in file_names:
455             self.assertFalse(name in updated_collection['manifest_text'])
456
457         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
458         for name in file_names:
459             self.assertTrue(name in updated_collection['manifest_text'])
460
461 class FuseListLargeProjectContents(MountTestBase):
462     @profiled
463     def getProjectWithManyCollections(self):
464         project_contents = llfuse.listdir(self.mounttmp)
465         self.assertEqual(201, len(project_contents))
466         self.assertIn('Collection_1', project_contents)
467         return project_contents
468
469     @profiled
470     def listContentsInProjectWithManyCollections(self, project_contents):
471         project_contents = llfuse.listdir(self.mounttmp)
472         self.assertEqual(201, len(project_contents))
473         self.assertIn('Collection_1', project_contents)
474
475         for collection_name in project_contents:
476             collection_contents = llfuse.listdir(os.path.join(self.mounttmp, collection_name))
477             self.assertIn('baz', collection_contents)
478
479     @slow_test
480     def test_listLargeProjectContents(self):
481         self.make_mount(fuse.ProjectDirectory,
482                         project_object=run_test_server.fixture('groups')['project_with_201_collections'])
483         project_contents = self.getProjectWithManyCollections()
484         self.listContentsInProjectWithManyCollections(project_contents)