Merge branch '8784-dir-listings'
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 import arvados
6 import arvados_fuse as fuse
7 import llfuse
8 import logging
9 import os
10 import sys
11 import unittest
12 from .. import run_test_server
13 from ..mount_test_base import MountTestBase
14 from ..slow_test import slow_test
15
16 logger = logging.getLogger('arvados.arv-mount')
17
18 from performance_profiler import profiled
19
20 def fuse_createCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
21     class Test(unittest.TestCase):
22         def runTest(self):
23             self.createCollectionWithMultipleBlocks()
24
25         @profiled
26         def createCollectionWithMultipleBlocks(self):
27             for i in range(0, streams):
28                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
29
30                 # Create files
31                 for j in range(0, files_per_stream):
32                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
33                         f.write(data)
34
35     Test().runTest()
36
37 def fuse_readContentsFromCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
38     class Test(unittest.TestCase):
39         def runTest(self):
40             self.readContentsFromCollectionWithMultipleBlocks()
41
42         @profiled
43         def readContentsFromCollectionWithMultipleBlocks(self):
44             for i in range(0, streams):
45                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
46                 for j in range(0, files_per_stream):
47                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
48                         self.assertEqual(data, f.read())
49
50     Test().runTest()
51
52 def fuse_moveFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
53     class Test(unittest.TestCase):
54         def runTest(self):
55             self.moveFileFromCollectionWithMultipleBlocks()
56
57         @profiled
58         def moveFileFromCollectionWithMultipleBlocks(self):
59             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
60             self.assertIn(filename, d1)
61
62             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
63
64             d1 = llfuse.listdir(os.path.join(mounttmp))
65             self.assertIn('moved_from_'+stream+'_'+filename, d1)
66
67             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
68             self.assertNotIn(filename, d1)
69
70     Test().runTest()
71
72 def fuse_deleteFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
73     class Test(unittest.TestCase):
74         def runTest(self):
75             self.deleteFileFromCollectionWithMultipleBlocks()
76
77         @profiled
78         def deleteFileFromCollectionWithMultipleBlocks(self):
79             os.remove(os.path.join(mounttmp, stream, filename))
80
81     Test().runTest()
82
83 # Create a collection with 2 streams, 3 files_per_stream, 2 blocks_per_file, 2**26 bytes_per_block
84 class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
85     def setUp(self):
86         super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
87
88     @slow_test
89     def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
90         collection = arvados.collection.Collection(api_client=self.api)
91         collection.save_new()
92
93         m = self.make_mount(fuse.CollectionDirectory)
94         with llfuse.lock:
95             m.new_collection(collection.api_response(), collection)
96         self.assertTrue(m.writable())
97
98         streams = 2
99         files_per_stream = 3
100         blocks_per_file = 2
101         bytes_per_block = 2**26
102
103         data = 'x' * blocks_per_file * bytes_per_block
104
105         self.pool.apply(fuse_createCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
106
107         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
108
109         for i in range(0, streams):
110             self.assertIn('./stream' + str(i), collection2["manifest_text"])
111
112         for i in range(0, files_per_stream):
113             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
114
115         # Read file contents
116         self.pool.apply(fuse_readContentsFromCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
117
118         # Move file0.txt out of the streams into .
119         for i in range(0, streams):
120             self.pool.apply(fuse_moveFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
121
122         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
123
124         manifest_streams = collection2['manifest_text'].split('\n')
125         self.assertEqual(4, len(manifest_streams))
126
127         for i in range(0, streams):
128             self.assertIn('file0.txt', manifest_streams[0])
129
130         for i in range(0, streams):
131             self.assertNotIn('file0.txt', manifest_streams[i+1])
132
133         for i in range(0, streams):
134             for j in range(1, files_per_stream):
135                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
136
137         # Delete 'file1.txt' from all the streams
138         for i in range(0, streams):
139             self.pool.apply(fuse_deleteFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
140
141         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
142
143         manifest_streams = collection2['manifest_text'].split('\n')
144         self.assertEqual(4, len(manifest_streams))
145
146         for i in range(0, streams):
147             self.assertIn('file0.txt', manifest_streams[0])
148
149         self.assertNotIn('file1.txt', collection2['manifest_text'])
150
151         for i in range(0, streams):
152             for j in range(2, files_per_stream):
153                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
154
155
156 def fuse_createCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
157     class Test(unittest.TestCase):
158         def runTest(self):
159             self.createCollectionWithManyFiles()
160
161         @profiled
162         def createCollectionWithManyFiles(self):
163             for i in range(0, streams):
164                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
165
166                 # Create files
167                 for j in range(0, files_per_stream):
168                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
169                         f.write(data)
170
171     Test().runTest()
172
173 def fuse_readContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
174     class Test(unittest.TestCase):
175         def runTest(self):
176             self.readContentsFromCollectionWithManyFiles()
177
178         @profiled
179         def readContentsFromCollectionWithManyFiles(self):
180             for i in range(0, streams):
181                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
182                 for j in range(0, files_per_stream):
183                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
184                         self.assertEqual(data, f.read())
185
186     Test().runTest()
187
188 def fuse_moveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
189     class Test(unittest.TestCase):
190         def runTest(self):
191             self.moveFileFromCollectionWithManyFiles()
192
193         @profiled
194         def moveFileFromCollectionWithManyFiles(self):
195             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
196             self.assertIn(filename, d1)
197
198             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
199
200             d1 = llfuse.listdir(os.path.join(mounttmp))
201             self.assertIn('moved_from_'+stream+'_'+filename, d1)
202
203             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
204             self.assertNotIn(filename, d1)
205
206     Test().runTest()
207
208 def fuse_deleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
209     class Test(unittest.TestCase):
210         def runTest(self):
211             self.deleteFileFromCollectionWithManyFiles()
212
213         @profiled
214         def deleteFileFromCollectionWithManyFiles(self):
215             os.remove(os.path.join(mounttmp, stream, filename))
216
217     Test().runTest()
218
219 # Create a collection with two streams, each with 200 files
220 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
221     def setUp(self):
222         super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
223
224     @slow_test
225     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
226         collection = arvados.collection.Collection(api_client=self.api)
227         collection.save_new()
228
229         m = self.make_mount(fuse.CollectionDirectory)
230         with llfuse.lock:
231             m.new_collection(collection.api_response(), collection)
232         self.assertTrue(m.writable())
233
234         streams = 2
235         files_per_stream = 200
236         data = 'x'
237
238         self.pool.apply(fuse_createCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
239
240         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
241
242         for i in range(0, streams):
243             self.assertIn('./stream' + str(i), collection2["manifest_text"])
244
245         for i in range(0, files_per_stream):
246             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
247
248         # Read file contents
249         self.pool.apply(fuse_readContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
250
251         # Move file0.txt out of the streams into .
252         for i in range(0, streams):
253             self.pool.apply(fuse_moveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
254
255         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
256
257         manifest_streams = collection2['manifest_text'].split('\n')
258         self.assertEqual(4, len(manifest_streams))
259
260         for i in range(0, streams):
261             self.assertIn('file0.txt', manifest_streams[0])
262
263         for i in range(0, streams):
264             self.assertNotIn('file0.txt', manifest_streams[i+1])
265
266         for i in range(0, streams):
267             for j in range(1, files_per_stream):
268                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
269
270         # Delete 'file1.txt' from all the streams
271         for i in range(0, streams):
272             self.pool.apply(fuse_deleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
273
274         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
275
276         manifest_streams = collection2['manifest_text'].split('\n')
277         self.assertEqual(4, len(manifest_streams))
278
279         for i in range(0, streams):
280             self.assertIn('file0.txt', manifest_streams[0])
281
282         self.assertNotIn('file1.txt', collection2['manifest_text'])
283
284         for i in range(0, streams):
285             for j in range(2, files_per_stream):
286                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
287
288
289 def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
290     class Test(unittest.TestCase):
291         def runTest(self):
292             self.magicDirTest_moveFileFromCollection()
293
294         @profiled
295         def magicDirTest_moveFileFromCollection(self):
296             os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
297
298     Test().runTest()
299
300 def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
301     class Test(unittest.TestCase):
302         def runTest(self):
303             self.magicDirTest_removeFileFromCollection()
304
305         @profiled
306         def magicDirTest_removeFileFromCollection(self):
307             os.remove(os.path.join(mounttmp, collection1, filename))
308
309     Test().runTest()
310
311 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
312     def setUp(self):
313         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
314
315     @profiled
316     def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
317         # Create collection
318         collection = arvados.collection.Collection(api_client=self.api)
319         for j in range(0, files_per_stream):
320             with collection.open("file"+str(j)+".txt", "w") as f:
321                 f.write(data)
322         collection.save_new()
323         return collection
324
325     @profiled
326     def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, data='x'):
327         mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
328
329         files = {}
330         for j in range(0, files_per_stream):
331             files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
332
333         for k, v in files.items():
334             with open(os.path.join(self.mounttmp, collection, k)) as f:
335                 self.assertEqual(v, f.read())
336
337     @slow_test
338     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
339         streams = 2
340         files_per_stream = 200
341         data = 'x'
342
343         collection1 = self.magicDirTest_createCollectionWithManyFiles()
344         # Create collection with multiple files
345         collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, data)
346
347         # Mount FuseMagicDir
348         self.make_mount(fuse.MagicDirectory)
349
350         self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
351
352         # Move file0.txt out of the collection2 into collection1
353         self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(),
354               collection1.manifest_locator(), 'stream0', 'file0.txt',))
355         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
356         self.assertFalse('file0.txt' in updated_collection['manifest_text'])
357         self.assertTrue('file1.txt' in updated_collection['manifest_text'])
358
359         # Delete file1.txt from collection2
360         self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(), 'stream0', 'file1.txt',))
361         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
362         self.assertFalse('file1.txt' in updated_collection['manifest_text'])
363         self.assertTrue('file2.txt' in updated_collection['manifest_text'])
364
365
366 def magicDirTest_MoveAllFilesFromCollection(mounttmp, from_collection, to_collection, stream, files_per_stream):
367     class Test(unittest.TestCase):
368         def runTest(self):
369             self.magicDirTest_moveAllFilesFromCollection()
370
371         @profiled
372         def magicDirTest_moveAllFilesFromCollection(self):
373             for j in range(0, files_per_stream):
374                 os.rename(os.path.join(mounttmp, from_collection, 'file'+str(j)+'.txt'), os.path.join(mounttmp, to_collection, 'file'+str(j)+'.txt'))
375
376     Test().runTest()
377
378 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
379     def setUp(self):
380         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
381
382     @profiled
383     def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
384             blocks_per_file=0, bytes_per_block=0, data='x'):
385         # Create collection
386         collection = arvados.collection.Collection(api_client=self.api)
387         for j in range(0, files_per_stream):
388             with collection.open("file"+str(j)+".txt", "w") as f:
389                 f.write(data)
390         collection.save_new()
391         return collection
392
393     @slow_test
394     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
395         streams = 2
396         files_per_stream = 200
397         data = 'x'
398
399         collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
400         # Create collection with multiple files
401         collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
402
403         # Mount FuseMagicDir
404         self.make_mount(fuse.MagicDirectory)
405
406         # Move all files from collection2 into collection1
407         self.pool.apply(magicDirTest_MoveAllFilesFromCollection, (self.mounttmp, collection2.manifest_locator(),
408                   collection1.manifest_locator(), 'stream0', files_per_stream,))
409
410         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
411         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
412         for name in file_names:
413             self.assertFalse(name in updated_collection['manifest_text'])
414
415         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
416         for name in file_names:
417             self.assertTrue(name in updated_collection['manifest_text'])
418
419
420 # Move one file at a time from one collection into another
421 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
422     def setUp(self):
423         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
424
425     @profiled
426     def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
427         # Create collection
428         collection = arvados.collection.Collection(api_client=self.api)
429         for j in range(0, files_per_stream):
430             with collection.open("file"+str(j)+".txt", "w") as f:
431                 f.write(data)
432         collection.save_new()
433         return collection
434
435     def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
436         for j in range(0, files_per_stream):
437             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
438                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
439
440     @slow_test
441     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
442         streams = 2
443         files_per_stream = 200
444         data = 'x'
445
446         collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
447         # Create collection with multiple files
448         collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
449
450         # Mount FuseMagicDir
451         self.make_mount(fuse.MagicDirectory)
452
453         # Move all files from collection2 into collection1
454         self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
455
456         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
457         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
458         for name in file_names:
459             self.assertFalse(name in updated_collection['manifest_text'])
460
461         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
462         for name in file_names:
463             self.assertTrue(name in updated_collection['manifest_text'])
464
465 class FuseListLargeProjectContents(MountTestBase):
466     @profiled
467     def getProjectWithManyCollections(self):
468         project_contents = llfuse.listdir(self.mounttmp)
469         self.assertEqual(201, len(project_contents))
470         self.assertIn('Collection_1', project_contents)
471         return project_contents
472
473     @profiled
474     def listContentsInProjectWithManyCollections(self, project_contents):
475         project_contents = llfuse.listdir(self.mounttmp)
476         self.assertEqual(201, len(project_contents))
477         self.assertIn('Collection_1', project_contents)
478
479         for collection_name in project_contents:
480             collection_contents = llfuse.listdir(os.path.join(self.mounttmp, collection_name))
481             self.assertIn('baz', collection_contents)
482
483     @slow_test
484     def test_listLargeProjectContents(self):
485         self.make_mount(fuse.ProjectDirectory,
486                         project_object=run_test_server.fixture('groups')['project_with_201_collections'])
487         project_contents = self.getProjectWithManyCollections()
488         self.listContentsInProjectWithManyCollections(project_contents)