19464: Don't repeat the workflow id
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 from __future__ import absolute_import
6 from future.utils import viewitems
7 from builtins import str
8 from builtins import range
9 import arvados
10 import arvados_fuse as fuse
11 import llfuse
12 import logging
13 import os
14 import sys
15 import unittest
16 from .. import run_test_server
17 from ..mount_test_base import MountTestBase
18 from ..slow_test import slow_test
19
20 logger = logging.getLogger('arvados.arv-mount')
21
22 from .performance_profiler import profiled
23
24 def fuse_createCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
25     class Test(unittest.TestCase):
26         def runTest(self):
27             self.createCollectionWithMultipleBlocks()
28
29         @profiled
30         def createCollectionWithMultipleBlocks(self):
31             for i in range(0, streams):
32                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
33
34                 # Create files
35                 for j in range(0, files_per_stream):
36                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
37                         f.write(data)
38
39     Test().runTest()
40
41 def fuse_readContentsFromCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
42     class Test(unittest.TestCase):
43         def runTest(self):
44             self.readContentsFromCollectionWithMultipleBlocks()
45
46         @profiled
47         def readContentsFromCollectionWithMultipleBlocks(self):
48             for i in range(0, streams):
49                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
50                 for j in range(0, files_per_stream):
51                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
52                         self.assertEqual(data, f.read())
53
54     Test().runTest()
55
56 def fuse_moveFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
57     class Test(unittest.TestCase):
58         def runTest(self):
59             self.moveFileFromCollectionWithMultipleBlocks()
60
61         @profiled
62         def moveFileFromCollectionWithMultipleBlocks(self):
63             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
64             self.assertIn(filename, d1)
65
66             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
67
68             d1 = llfuse.listdir(os.path.join(mounttmp))
69             self.assertIn('moved_from_'+stream+'_'+filename, d1)
70
71             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
72             self.assertNotIn(filename, d1)
73
74     Test().runTest()
75
76 def fuse_deleteFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
77     class Test(unittest.TestCase):
78         def runTest(self):
79             self.deleteFileFromCollectionWithMultipleBlocks()
80
81         @profiled
82         def deleteFileFromCollectionWithMultipleBlocks(self):
83             os.remove(os.path.join(mounttmp, stream, filename))
84
85     Test().runTest()
86
87 # Create a collection with 2 streams, 3 files_per_stream, 2 blocks_per_file, 2**26 bytes_per_block
88 class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
89     def setUp(self):
90         super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
91
92     @slow_test
93     def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
94         collection = arvados.collection.Collection(api_client=self.api)
95         collection.save_new()
96
97         m = self.make_mount(fuse.CollectionDirectory)
98         with llfuse.lock:
99             m.new_collection(collection.api_response(), collection)
100         self.assertTrue(m.writable())
101
102         streams = 2
103         files_per_stream = 3
104         blocks_per_file = 2
105         bytes_per_block = 2**26
106
107         data = 'x' * blocks_per_file * bytes_per_block
108
109         self.pool.apply(fuse_createCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
110
111         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
112
113         for i in range(0, streams):
114             self.assertIn('./stream' + str(i), collection2["manifest_text"])
115
116         for i in range(0, files_per_stream):
117             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
118
119         # Read file contents
120         self.pool.apply(fuse_readContentsFromCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
121
122         # Move file0.txt out of the streams into .
123         for i in range(0, streams):
124             self.pool.apply(fuse_moveFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
125
126         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
127
128         manifest_streams = collection2['manifest_text'].split('\n')
129         self.assertEqual(4, len(manifest_streams))
130
131         for i in range(0, streams):
132             self.assertIn('file0.txt', manifest_streams[0])
133
134         for i in range(0, streams):
135             self.assertNotIn('file0.txt', manifest_streams[i+1])
136
137         for i in range(0, streams):
138             for j in range(1, files_per_stream):
139                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
140
141         # Delete 'file1.txt' from all the streams
142         for i in range(0, streams):
143             self.pool.apply(fuse_deleteFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
144
145         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
146
147         manifest_streams = collection2['manifest_text'].split('\n')
148         self.assertEqual(4, len(manifest_streams))
149
150         for i in range(0, streams):
151             self.assertIn('file0.txt', manifest_streams[0])
152
153         self.assertNotIn('file1.txt', collection2['manifest_text'])
154
155         for i in range(0, streams):
156             for j in range(2, files_per_stream):
157                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
158
159
160 def fuse_createCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
161     class Test(unittest.TestCase):
162         def runTest(self):
163             self.createCollectionWithManyFiles()
164
165         @profiled
166         def createCollectionWithManyFiles(self):
167             for i in range(0, streams):
168                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
169
170                 # Create files
171                 for j in range(0, files_per_stream):
172                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
173                         f.write(data)
174
175     Test().runTest()
176
177 def fuse_readContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
178     class Test(unittest.TestCase):
179         def runTest(self):
180             self.readContentsFromCollectionWithManyFiles()
181
182         @profiled
183         def readContentsFromCollectionWithManyFiles(self):
184             for i in range(0, streams):
185                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
186                 for j in range(0, files_per_stream):
187                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
188                         self.assertEqual(data, f.read())
189
190     Test().runTest()
191
192 def fuse_moveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
193     class Test(unittest.TestCase):
194         def runTest(self):
195             self.moveFileFromCollectionWithManyFiles()
196
197         @profiled
198         def moveFileFromCollectionWithManyFiles(self):
199             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
200             self.assertIn(filename, d1)
201
202             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
203
204             d1 = llfuse.listdir(os.path.join(mounttmp))
205             self.assertIn('moved_from_'+stream+'_'+filename, d1)
206
207             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
208             self.assertNotIn(filename, d1)
209
210     Test().runTest()
211
212 def fuse_deleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
213     class Test(unittest.TestCase):
214         def runTest(self):
215             self.deleteFileFromCollectionWithManyFiles()
216
217         @profiled
218         def deleteFileFromCollectionWithManyFiles(self):
219             os.remove(os.path.join(mounttmp, stream, filename))
220
221     Test().runTest()
222
223 # Create a collection with two streams, each with 200 files
224 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
225     def setUp(self):
226         super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
227
228     @slow_test
229     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
230         collection = arvados.collection.Collection(api_client=self.api)
231         collection.save_new()
232
233         m = self.make_mount(fuse.CollectionDirectory)
234         with llfuse.lock:
235             m.new_collection(collection.api_response(), collection)
236         self.assertTrue(m.writable())
237
238         streams = 2
239         files_per_stream = 200
240         data = 'x'
241
242         self.pool.apply(fuse_createCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
243
244         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
245
246         for i in range(0, streams):
247             self.assertIn('./stream' + str(i), collection2["manifest_text"])
248
249         for i in range(0, files_per_stream):
250             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
251
252         # Read file contents
253         self.pool.apply(fuse_readContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
254
255         # Move file0.txt out of the streams into .
256         for i in range(0, streams):
257             self.pool.apply(fuse_moveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
258
259         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
260
261         manifest_streams = collection2['manifest_text'].split('\n')
262         self.assertEqual(4, len(manifest_streams))
263
264         for i in range(0, streams):
265             self.assertIn('file0.txt', manifest_streams[0])
266
267         for i in range(0, streams):
268             self.assertNotIn('file0.txt', manifest_streams[i+1])
269
270         for i in range(0, streams):
271             for j in range(1, files_per_stream):
272                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
273
274         # Delete 'file1.txt' from all the streams
275         for i in range(0, streams):
276             self.pool.apply(fuse_deleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
277
278         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
279
280         manifest_streams = collection2['manifest_text'].split('\n')
281         self.assertEqual(4, len(manifest_streams))
282
283         for i in range(0, streams):
284             self.assertIn('file0.txt', manifest_streams[0])
285
286         self.assertNotIn('file1.txt', collection2['manifest_text'])
287
288         for i in range(0, streams):
289             for j in range(2, files_per_stream):
290                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
291
292
293 def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
294     class Test(unittest.TestCase):
295         def runTest(self):
296             self.magicDirTest_moveFileFromCollection()
297
298         @profiled
299         def magicDirTest_moveFileFromCollection(self):
300             os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
301
302     Test().runTest()
303
304 def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
305     class Test(unittest.TestCase):
306         def runTest(self):
307             self.magicDirTest_removeFileFromCollection()
308
309         @profiled
310         def magicDirTest_removeFileFromCollection(self):
311             os.remove(os.path.join(mounttmp, collection1, filename))
312
313     Test().runTest()
314
315 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
316     def setUp(self):
317         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
318
319     @profiled
320     def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
321         # Create collection
322         collection = arvados.collection.Collection(api_client=self.api)
323         for j in range(0, files_per_stream):
324             with collection.open("file"+str(j)+".txt", "w") as f:
325                 f.write(data)
326         collection.save_new()
327         return collection
328
329     @profiled
330     def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, data='x'):
331         mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
332
333         files = {}
334         for j in range(0, files_per_stream):
335             files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
336
337         for k, v in viewItems(files):
338             with open(os.path.join(self.mounttmp, collection, k)) as f:
339                 self.assertEqual(v, f.read())
340
341     @slow_test
342     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
343         streams = 2
344         files_per_stream = 200
345         data = 'x'
346
347         collection1 = self.magicDirTest_createCollectionWithManyFiles()
348         # Create collection with multiple files
349         collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, data)
350
351         # Mount FuseMagicDir
352         self.make_mount(fuse.MagicDirectory)
353
354         self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
355
356         # Move file0.txt out of the collection2 into collection1
357         self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(),
358               collection1.manifest_locator(), 'stream0', 'file0.txt',))
359         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
360         self.assertFalse('file0.txt' in updated_collection['manifest_text'])
361         self.assertTrue('file1.txt' in updated_collection['manifest_text'])
362
363         # Delete file1.txt from collection2
364         self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(), 'stream0', 'file1.txt',))
365         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
366         self.assertFalse('file1.txt' in updated_collection['manifest_text'])
367         self.assertTrue('file2.txt' in updated_collection['manifest_text'])
368
369
370 def magicDirTest_MoveAllFilesFromCollection(mounttmp, from_collection, to_collection, stream, files_per_stream):
371     class Test(unittest.TestCase):
372         def runTest(self):
373             self.magicDirTest_moveAllFilesFromCollection()
374
375         @profiled
376         def magicDirTest_moveAllFilesFromCollection(self):
377             for j in range(0, files_per_stream):
378                 os.rename(os.path.join(mounttmp, from_collection, 'file'+str(j)+'.txt'), os.path.join(mounttmp, to_collection, 'file'+str(j)+'.txt'))
379
380     Test().runTest()
381
382 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
383     def setUp(self):
384         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
385
386     @profiled
387     def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
388             blocks_per_file=0, bytes_per_block=0, data='x'):
389         # Create collection
390         collection = arvados.collection.Collection(api_client=self.api)
391         for j in range(0, files_per_stream):
392             with collection.open("file"+str(j)+".txt", "w") as f:
393                 f.write(data)
394         collection.save_new()
395         return collection
396
397     @slow_test
398     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
399         streams = 2
400         files_per_stream = 200
401         data = 'x'
402
403         collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
404         # Create collection with multiple files
405         collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
406
407         # Mount FuseMagicDir
408         self.make_mount(fuse.MagicDirectory)
409
410         # Move all files from collection2 into collection1
411         self.pool.apply(magicDirTest_MoveAllFilesFromCollection, (self.mounttmp, collection2.manifest_locator(),
412                   collection1.manifest_locator(), 'stream0', files_per_stream,))
413
414         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
415         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
416         for name in file_names:
417             self.assertFalse(name in updated_collection['manifest_text'])
418
419         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
420         for name in file_names:
421             self.assertTrue(name in updated_collection['manifest_text'])
422
423
424 # Move one file at a time from one collection into another
425 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
426     def setUp(self):
427         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
428
429     @profiled
430     def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
431         # Create collection
432         collection = arvados.collection.Collection(api_client=self.api)
433         for j in range(0, files_per_stream):
434             with collection.open("file"+str(j)+".txt", "w") as f:
435                 f.write(data)
436         collection.save_new()
437         return collection
438
439     def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
440         for j in range(0, files_per_stream):
441             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
442                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
443
444     @slow_test
445     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
446         streams = 2
447         files_per_stream = 200
448         data = 'x'
449
450         collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
451         # Create collection with multiple files
452         collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
453
454         # Mount FuseMagicDir
455         self.make_mount(fuse.MagicDirectory)
456
457         # Move all files from collection2 into collection1
458         self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
459
460         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
461         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
462         for name in file_names:
463             self.assertFalse(name in updated_collection['manifest_text'])
464
465         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
466         for name in file_names:
467             self.assertTrue(name in updated_collection['manifest_text'])
468
469 class FuseListLargeProjectContents(MountTestBase):
470     @profiled
471     def getProjectWithManyCollections(self):
472         project_contents = llfuse.listdir(self.mounttmp)
473         self.assertEqual(201, len(project_contents))
474         self.assertIn('Collection_1', project_contents)
475
476     @profiled
477     def listContentsInProjectWithManyCollections(self):
478         project_contents = llfuse.listdir(self.mounttmp)
479         self.assertEqual(201, len(project_contents))
480         self.assertIn('Collection_1', project_contents)
481
482         for collection_name in project_contents:
483             collection_contents = llfuse.listdir(os.path.join(self.mounttmp, collection_name))
484             self.assertIn('baz', collection_contents)
485
486     @slow_test
487     def test_listLargeProjectContents(self):
488         self.make_mount(fuse.ProjectDirectory,
489                         project_object=run_test_server.fixture('groups')['project_with_201_collections'])
490         self.getProjectWithManyCollections()
491         self.listContentsInProjectWithManyCollections()