Merge branch '6221-write-trash-list'
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 import arvados
2 import arvados_fuse as fuse
3 import llfuse
4 import logging
5 import os
6 import sys
7 import unittest
8 from .. import run_test_server
9 from ..mount_test_base import MountTestBase
10
11 logger = logging.getLogger('arvados.arv-mount')
12
13 from performance_profiler import profiled
14
15 def fuse_createCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
16     class Test(unittest.TestCase):
17         def runTest(self):
18             self.createCollectionWithMultipleBlocks()
19
20         @profiled
21         def createCollectionWithMultipleBlocks(self):
22             for i in range(0, streams):
23                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
24
25                 # Create files
26                 for j in range(0, files_per_stream):
27                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
28                         f.write(data)
29
30     Test().runTest()
31
32 def fuse_readContentsFromCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
33     class Test(unittest.TestCase):
34         def runTest(self):
35             self.readContentsFromCollectionWithMultipleBlocks()
36
37         @profiled
38         def readContentsFromCollectionWithMultipleBlocks(self):
39             for i in range(0, streams):
40                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
41                 for j in range(0, files_per_stream):
42                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
43                         self.assertEqual(data, f.read())
44
45     Test().runTest()
46
47 def fuse_moveFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
48     class Test(unittest.TestCase):
49         def runTest(self):
50             self.moveFileFromCollectionWithMultipleBlocks()
51
52         @profiled
53         def moveFileFromCollectionWithMultipleBlocks(self):
54             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
55             self.assertIn(filename, d1)
56
57             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
58
59             d1 = llfuse.listdir(os.path.join(mounttmp))
60             self.assertIn('moved_from_'+stream+'_'+filename, d1)
61
62             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
63             self.assertNotIn(filename, d1)
64
65     Test().runTest()
66
67 def fuse_deleteFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
68     class Test(unittest.TestCase):
69         def runTest(self):
70             self.deleteFileFromCollectionWithMultipleBlocks()
71
72         @profiled
73         def deleteFileFromCollectionWithMultipleBlocks(self):
74             os.remove(os.path.join(mounttmp, stream, filename))
75
76     Test().runTest()
77
78 # Create a collection with 2 streams, 3 files_per_stream, 2 blocks_per_file, 2**26 bytes_per_block
79 class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
80     def setUp(self):
81         super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
82
83     def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
84         collection = arvados.collection.Collection(api_client=self.api)
85         collection.save_new()
86
87         m = self.make_mount(fuse.CollectionDirectory)
88         with llfuse.lock:
89             m.new_collection(collection.api_response(), collection)
90         self.assertTrue(m.writable())
91
92         streams = 2
93         files_per_stream = 3
94         blocks_per_file = 2
95         bytes_per_block = 2**26
96
97         data = 'x' * blocks_per_file * bytes_per_block
98
99         self.pool.apply(fuse_createCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
100
101         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
102
103         for i in range(0, streams):
104             self.assertIn('./stream' + str(i), collection2["manifest_text"])
105
106         for i in range(0, files_per_stream):
107             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
108
109         # Read file contents
110         self.pool.apply(fuse_readContentsFromCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
111
112         # Move file0.txt out of the streams into .
113         for i in range(0, streams):
114             self.pool.apply(fuse_moveFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
115
116         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
117
118         manifest_streams = collection2['manifest_text'].split('\n')
119         self.assertEqual(4, len(manifest_streams))
120
121         for i in range(0, streams):
122             self.assertIn('file0.txt', manifest_streams[0])
123
124         for i in range(0, streams):
125             self.assertNotIn('file0.txt', manifest_streams[i+1])
126
127         for i in range(0, streams):
128             for j in range(1, files_per_stream):
129                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
130
131         # Delete 'file1.txt' from all the streams
132         for i in range(0, streams):
133             self.pool.apply(fuse_deleteFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
134
135         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
136
137         manifest_streams = collection2['manifest_text'].split('\n')
138         self.assertEqual(4, len(manifest_streams))
139
140         for i in range(0, streams):
141             self.assertIn('file0.txt', manifest_streams[0])
142
143         self.assertNotIn('file1.txt', collection2['manifest_text'])
144
145         for i in range(0, streams):
146             for j in range(2, files_per_stream):
147                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
148
149
150 def fuse_createCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
151     class Test(unittest.TestCase):
152         def runTest(self):
153             self.createCollectionWithManyFiles()
154
155         @profiled
156         def createCollectionWithManyFiles(self):
157             for i in range(0, streams):
158                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
159
160                 # Create files
161                 for j in range(0, files_per_stream):
162                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
163                         f.write(data)
164
165     Test().runTest()
166
167 def fuse_readContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
168     class Test(unittest.TestCase):
169         def runTest(self):
170             self.readContentsFromCollectionWithManyFiles()
171
172         @profiled
173         def readContentsFromCollectionWithManyFiles(self):
174             for i in range(0, streams):
175                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
176                 for j in range(0, files_per_stream):
177                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
178                         self.assertEqual(data, f.read())
179
180     Test().runTest()
181
182 def fuse_moveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
183     class Test(unittest.TestCase):
184         def runTest(self):
185             self.moveFileFromCollectionWithManyFiles()
186
187         @profiled
188         def moveFileFromCollectionWithManyFiles(self):
189             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
190             self.assertIn(filename, d1)
191
192             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
193
194             d1 = llfuse.listdir(os.path.join(mounttmp))
195             self.assertIn('moved_from_'+stream+'_'+filename, d1)
196
197             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
198             self.assertNotIn(filename, d1)
199
200     Test().runTest()
201
202 def fuse_deleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
203     class Test(unittest.TestCase):
204         def runTest(self):
205             self.deleteFileFromCollectionWithManyFiles()
206
207         @profiled
208         def deleteFileFromCollectionWithManyFiles(self):
209             os.remove(os.path.join(mounttmp, stream, filename))
210
211     Test().runTest()
212
213 # Create a collection with two streams, each with 200 files
214 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
215     def setUp(self):
216         super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
217
218     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
219         collection = arvados.collection.Collection(api_client=self.api)
220         collection.save_new()
221
222         m = self.make_mount(fuse.CollectionDirectory)
223         with llfuse.lock:
224             m.new_collection(collection.api_response(), collection)
225         self.assertTrue(m.writable())
226
227         streams = 2
228         files_per_stream = 200
229         data = 'x'
230
231         self.pool.apply(fuse_createCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
232
233         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
234
235         for i in range(0, streams):
236             self.assertIn('./stream' + str(i), collection2["manifest_text"])
237
238         for i in range(0, files_per_stream):
239             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
240
241         # Read file contents
242         self.pool.apply(fuse_readContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
243
244         # Move file0.txt out of the streams into .
245         for i in range(0, streams):
246             self.pool.apply(fuse_moveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
247
248         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
249
250         manifest_streams = collection2['manifest_text'].split('\n')
251         self.assertEqual(4, len(manifest_streams))
252
253         for i in range(0, streams):
254             self.assertIn('file0.txt', manifest_streams[0])
255
256         for i in range(0, streams):
257             self.assertNotIn('file0.txt', manifest_streams[i+1])
258
259         for i in range(0, streams):
260             for j in range(1, files_per_stream):
261                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
262
263         # Delete 'file1.txt' from all the streams
264         for i in range(0, streams):
265             self.pool.apply(fuse_deleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
266
267         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
268
269         manifest_streams = collection2['manifest_text'].split('\n')
270         self.assertEqual(4, len(manifest_streams))
271
272         for i in range(0, streams):
273             self.assertIn('file0.txt', manifest_streams[0])
274
275         self.assertNotIn('file1.txt', collection2['manifest_text'])
276
277         for i in range(0, streams):
278             for j in range(2, files_per_stream):
279                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
280
281
282 def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
283     class Test(unittest.TestCase):
284         def runTest(self):
285             self.magicDirTest_moveFileFromCollection()
286
287         @profiled
288         def magicDirTest_moveFileFromCollection(self):
289             os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
290
291     Test().runTest()
292
293 def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
294     class Test(unittest.TestCase):
295         def runTest(self):
296             self.magicDirTest_removeFileFromCollection()
297
298         @profiled
299         def magicDirTest_removeFileFromCollection(self):
300             os.remove(os.path.join(mounttmp, collection1, filename))
301
302     Test().runTest()
303
304 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
305     def setUp(self):
306         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
307
308     @profiled
309     def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
310         # Create collection
311         collection = arvados.collection.Collection(api_client=self.api)
312         for j in range(0, files_per_stream):
313             with collection.open("file"+str(j)+".txt", "w") as f:
314                 f.write(data)
315         collection.save_new()
316         return collection
317
318     @profiled
319     def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, data='x'):
320         mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
321
322         files = {}
323         for j in range(0, files_per_stream):
324             files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
325
326         for k, v in files.items():
327             with open(os.path.join(self.mounttmp, collection, k)) as f:
328                 self.assertEqual(v, f.read())
329
330     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
331         streams = 2
332         files_per_stream = 200
333         data = 'x'
334
335         collection1 = self.magicDirTest_createCollectionWithManyFiles()
336         # Create collection with multiple files
337         collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, data)
338
339         # Mount FuseMagicDir
340         self.make_mount(fuse.MagicDirectory)
341
342         self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
343
344         # Move file0.txt out of the collection2 into collection1
345         self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(),
346               collection1.manifest_locator(), 'stream0', 'file0.txt',))
347         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
348         self.assertFalse('file0.txt' in updated_collection['manifest_text'])
349         self.assertTrue('file1.txt' in updated_collection['manifest_text'])
350
351         # Delete file1.txt from collection2
352         self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(), 'stream0', 'file1.txt',))
353         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
354         self.assertFalse('file1.txt' in updated_collection['manifest_text'])
355         self.assertTrue('file2.txt' in updated_collection['manifest_text'])
356
357
358 def magicDirTest_MoveAllFilesFromCollection(mounttmp, from_collection, to_collection, stream, files_per_stream):
359     class Test(unittest.TestCase):
360         def runTest(self):
361             self.magicDirTest_moveAllFilesFromCollection()
362
363         @profiled
364         def magicDirTest_moveAllFilesFromCollection(self):
365             for j in range(0, files_per_stream):
366                 os.rename(os.path.join(mounttmp, from_collection, 'file'+str(j)+'.txt'), os.path.join(mounttmp, to_collection, 'file'+str(j)+'.txt'))
367
368     Test().runTest()
369
370 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
371     def setUp(self):
372         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
373
374     @profiled
375     def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
376             blocks_per_file=0, bytes_per_block=0, data='x'):
377         # Create collection
378         collection = arvados.collection.Collection(api_client=self.api)
379         for j in range(0, files_per_stream):
380             with collection.open("file"+str(j)+".txt", "w") as f:
381                 f.write(data)
382         collection.save_new()
383         return collection
384
385     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
386         streams = 2
387         files_per_stream = 200
388         data = 'x'
389
390         collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
391         # Create collection with multiple files
392         collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
393
394         # Mount FuseMagicDir
395         self.make_mount(fuse.MagicDirectory)
396
397         # Move all files from collection2 into collection1
398         self.pool.apply(magicDirTest_MoveAllFilesFromCollection, (self.mounttmp, collection2.manifest_locator(),
399                   collection1.manifest_locator(), 'stream0', files_per_stream,))
400
401         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
402         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
403         for name in file_names:
404             self.assertFalse(name in updated_collection['manifest_text'])
405
406         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
407         for name in file_names:
408             self.assertTrue(name in updated_collection['manifest_text'])
409
410
411 # Move one file at a time from one collection into another
412 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
413     def setUp(self):
414         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
415
416     @profiled
417     def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
418         # Create collection
419         collection = arvados.collection.Collection(api_client=self.api)
420         for j in range(0, files_per_stream):
421             with collection.open("file"+str(j)+".txt", "w") as f:
422                 f.write(data)
423         collection.save_new()
424         return collection
425
426     def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
427         for j in range(0, files_per_stream):
428             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
429                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
430
431     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
432         streams = 2
433         files_per_stream = 200
434         data = 'x'
435
436         collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
437         # Create collection with multiple files
438         collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
439
440         # Mount FuseMagicDir
441         self.make_mount(fuse.MagicDirectory)
442
443         # Move all files from collection2 into collection1
444         self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
445
446         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
447         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
448         for name in file_names:
449             self.assertFalse(name in updated_collection['manifest_text'])
450
451         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
452         for name in file_names:
453             self.assertTrue(name in updated_collection['manifest_text'])
454
455 class FuseListLargeProjectContents(MountTestBase):
456     @profiled
457     def getProjectWithManyCollections(self):
458         project_contents = llfuse.listdir(self.mounttmp)
459         self.assertEqual(201, len(project_contents))
460         self.assertIn('Collection_1', project_contents)
461         return project_contents
462
463     @profiled
464     def listContentsInProjectWithManyCollections(self, project_contents):
465         project_contents = llfuse.listdir(self.mounttmp)
466         self.assertEqual(201, len(project_contents))
467         self.assertIn('Collection_1', project_contents)
468
469         for collection_name in project_contents:
470             collection_contents = llfuse.listdir(os.path.join(self.mounttmp, collection_name))
471             self.assertIn('baz', collection_contents)
472
473     def test_listLargeProjectContents(self):
474         self.make_mount(fuse.ProjectDirectory,
475                         project_object=run_test_server.fixture('groups')['project_with_201_collections'])
476         project_contents = self.getProjectWithManyCollections()
477         self.listContentsInProjectWithManyCollections(project_contents)