6219: add test to list contents of a project with many collections.
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 import arvados
2 import arvados_fuse as fuse
3 import llfuse
4 import logging
5 import os
6 import sys
7 import unittest
8 from .. import run_test_server
9 from ..mount_test_base import MountTestBase
10
11 logger = logging.getLogger('arvados.arv-mount')
12
13 from performance_profiler import profiled
14
15 def fuse_CreateCollection(mounttmp, streams=1, files_per_stream=1, data='x'):
16     class Test(unittest.TestCase):
17         def runTest(self):
18             for i in range(0, streams):
19                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
20
21                 # Create files
22                 for j in range(0, files_per_stream):
23                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
24                         f.write(data)
25
26     Test().runTest()
27
28 def fuse_ReadContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
29     class Test(unittest.TestCase):
30         def runTest(self):
31             for i in range(0, streams):
32                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
33                 for j in range(0, files_per_stream):
34                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
35                         self.assertEqual(data, f.read())
36
37     Test().runTest()
38
39 def fuse_MoveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
40     class Test(unittest.TestCase):
41         def runTest(self):
42             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
43             self.assertIn(filename, d1)
44
45             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
46
47             d1 = llfuse.listdir(os.path.join(mounttmp))
48             self.assertIn('moved_from_'+stream+'_'+filename, d1)
49
50             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
51             self.assertNotIn(filename, d1)
52
53     Test().runTest()
54
55 def fuse_DeleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
56     class Test(unittest.TestCase):
57         def runTest(self):
58             os.remove(os.path.join(mounttmp, stream, filename))
59
60     Test().runTest()
61
62 # Create a collection with 2 streams, 3 files_per_stream, 2 blocks_per_file, 2**26 bytes_per_block
63 class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
64     def setUp(self):
65         super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
66
67     @profiled
68     def createCollectionWithMultipleBlocks(self, streams, files_per_stream, data):
69         self.pool.apply(fuse_CreateCollection, (self.mounttmp, streams, files_per_stream, data,))
70
71     @profiled
72     def readContentsOfCollectionWithMultipleBlocks(self, streams, files_per_stream, data):
73         self.pool.apply(fuse_ReadContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
74
75     @profiled
76     def moveFileFromCollectionWithMultipleBlocks(self, streams):
77         for i in range(0, streams):
78             self.pool.apply(fuse_MoveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
79
80     @profiled
81     def removeFileFromCollectionWithMultipleBlocks(self, streams):
82         for i in range(0, streams):
83             self.pool.apply(fuse_DeleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
84
85     def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
86         collection = arvados.collection.Collection(api_client=self.api)
87         collection.save_new()
88
89         m = self.make_mount(fuse.CollectionDirectory)
90         with llfuse.lock:
91             m.new_collection(collection.api_response(), collection)
92         self.assertTrue(m.writable())
93
94         streams = 2
95         files_per_stream = 3
96         blocks_per_file = 2
97         bytes_per_block = 2**26
98
99         data = 'x' * blocks_per_file * bytes_per_block
100
101         self.createCollectionWithMultipleBlocks(streams, files_per_stream, data)
102
103         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
104
105         for i in range(0, streams):
106             self.assertIn('./stream' + str(i), collection2["manifest_text"])
107
108         for i in range(0, files_per_stream):
109             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
110
111         # Read file contents
112         self.readContentsOfCollectionWithMultipleBlocks(streams, files_per_stream, data)
113
114         # Move file0.txt out of the streams into .
115         self.moveFileFromCollectionWithMultipleBlocks(streams)
116
117         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
118
119         manifest_streams = collection2['manifest_text'].split('\n')
120         self.assertEqual(4, len(manifest_streams))
121
122         for i in range(0, streams):
123             self.assertIn('file0.txt', manifest_streams[0])
124
125         for i in range(0, streams):
126             self.assertNotIn('file0.txt', manifest_streams[i+1])
127
128         for i in range(0, streams):
129             for j in range(1, files_per_stream):
130                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
131
132         # Delete 'file1.txt' from all the streams
133         self.removeFileFromCollectionWithMultipleBlocks(streams)
134
135         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
136
137         manifest_streams = collection2['manifest_text'].split('\n')
138         self.assertEqual(4, len(manifest_streams))
139
140         for i in range(0, streams):
141             self.assertIn('file0.txt', manifest_streams[0])
142
143         self.assertNotIn('file1.txt', collection2['manifest_text'])
144
145         for i in range(0, streams):
146             for j in range(2, files_per_stream):
147                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
148
149 # Create a collection with two streams, each with 200 files
150 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
151     def setUp(self):
152         super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
153
154     @profiled
155     def createCollectionWithManyFiles(self, streams, files_per_stream, data):
156         self.pool.apply(fuse_CreateCollection, (self.mounttmp, streams, files_per_stream, data,))
157
158     @profiled
159     def readContentsOfCollectionWithManyFiles(self, streams, files_per_stream, data):
160         self.pool.apply(fuse_ReadContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
161
162     @profiled
163     def moveFileFromCollectionWithManyFiles(self, streams):
164         for i in range(0, streams):
165             self.pool.apply(fuse_MoveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
166
167     @profiled
168     def removeFileFromCollectionWithManyFiles(self, streams):
169         for i in range(0, streams):
170             self.pool.apply(fuse_DeleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
171
172     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
173         collection = arvados.collection.Collection(api_client=self.api)
174         collection.save_new()
175
176         m = self.make_mount(fuse.CollectionDirectory)
177         with llfuse.lock:
178             m.new_collection(collection.api_response(), collection)
179         self.assertTrue(m.writable())
180
181         streams = 2
182         files_per_stream = 200
183
184         data = 'x'
185
186         self.createCollectionWithManyFiles(streams, files_per_stream, data)
187
188         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
189
190         for i in range(0, streams):
191             self.assertIn('./stream' + str(i), collection2["manifest_text"])
192
193         for i in range(0, files_per_stream):
194             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
195
196         # Read file contents
197         self.readContentsOfCollectionWithManyFiles(streams, files_per_stream, data)
198
199         # Move file0.txt out of the streams into .
200         self.moveFileFromCollectionWithManyFiles(streams)
201
202         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
203
204         manifest_streams = collection2['manifest_text'].split('\n')
205         self.assertEqual(4, len(manifest_streams))
206
207         for i in range(0, streams):
208             self.assertIn('file0.txt', manifest_streams[0])
209
210         for i in range(0, streams):
211             self.assertNotIn('file0.txt', manifest_streams[i+1])
212
213         for i in range(0, streams):
214             for j in range(1, files_per_stream):
215                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
216
217         # Delete 'file1.txt' from all the streams
218         self.removeFileFromCollectionWithManyFiles(streams)
219
220         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
221
222         manifest_streams = collection2['manifest_text'].split('\n')
223         self.assertEqual(4, len(manifest_streams))
224
225         for i in range(0, streams):
226             self.assertIn('file0.txt', manifest_streams[0])
227
228         self.assertNotIn('file1.txt', collection2['manifest_text'])
229
230         for i in range(0, streams):
231             for j in range(2, files_per_stream):
232                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
233
234 def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
235     class Test(unittest.TestCase):
236         def runTest(self):
237             #os.rename(os.path.join(mounttmp, collection1, stream, filename), os.path.join(mounttmp, collection2, stream, filename))
238             os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
239
240     Test().runTest()
241
242 def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
243     class Test(unittest.TestCase):
244         def runTest(self):
245             os.remove(os.path.join(mounttmp, collection1, filename))
246
247     Test().runTest()
248
249 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
250     def setUp(self):
251         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
252
253     @profiled
254     def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
255         # Create collection
256         collection = arvados.collection.Collection(api_client=self.api)
257         for j in range(0, files_per_stream):
258             with collection.open("file"+str(j)+".txt", "w") as f:
259                 f.write(data)
260         collection.save_new()
261         return collection
262
263     @profiled
264     def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, data='x'):
265         mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
266
267         files = {}
268         for j in range(0, files_per_stream):
269             files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
270             #files[os.path.join(self.mounttmp, collection, 'stream'+str(i)+'/file'+str(j)+'.txt')] = data
271
272         for k, v in files.items():
273             with open(os.path.join(self.mounttmp, collection, k)) as f:
274                 self.assertEqual(v, f.read())
275
276     @profiled
277     def magicDirTest_moveFileFromCollection(self, from_collection, to_collection):
278         self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
279               to_collection.manifest_locator(), 'stream0', 'file0.txt',))
280         from_collection.update()
281         to_collection.update()
282
283     @profiled
284     def magicDirTest_removeFileFromCollection(self, collection):
285         self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection.manifest_locator(), 'stream0', 'file1.txt',))
286         collection.update()
287
288     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
289         streams = 2
290         files_per_stream = 200
291         data = 'x'
292
293         collection1 = self.magicDirTest_createCollectionWithManyFiles()
294         # Create collection with multiple files
295         collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, data)
296
297         # Mount FuseMagicDir
298         self.make_mount(fuse.MagicDirectory)
299
300         self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
301
302         # Move file0.txt out of the collection2 into collection1
303         self.magicDirTest_moveFileFromCollection(collection2, collection1)
304         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
305         self.assertFalse('file0.txt' in updated_collection['manifest_text'])
306         self.assertTrue('file1.txt' in updated_collection['manifest_text'])
307
308         # Delete file1.txt from collection2
309         self.magicDirTest_removeFileFromCollection(collection2)
310         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
311         self.assertFalse('file1.txt' in updated_collection['manifest_text'])
312         self.assertTrue('file2.txt' in updated_collection['manifest_text'])
313
314
315 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
316     def setUp(self):
317         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
318
319     @profiled
320     def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
321             blocks_per_file=0, bytes_per_block=0, data='x'):
322         # Create collection
323         collection = arvados.collection.Collection(api_client=self.api)
324         for j in range(0, files_per_stream):
325             with collection.open("file"+str(j)+".txt", "w") as f:
326                 f.write(data)
327         collection.save_new()
328         return collection
329
330     @profiled
331     def magicDirTestMoveAllFiles_moveFilesFromCollection(self, from_collection, to_collection, files_per_stream):
332         for j in range(0, files_per_stream):
333             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
334                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
335         from_collection.update()
336         to_collection.update()
337
338     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
339         streams = 2
340         files_per_stream = 200
341         data = 'x'
342
343         collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
344         # Create collection with multiple files
345         collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
346
347         # Mount FuseMagicDir
348         self.make_mount(fuse.MagicDirectory)
349
350         # Move all files from collection2 into collection1
351         self.magicDirTestMoveAllFiles_moveFilesFromCollection(collection2, collection1, files_per_stream)
352
353         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
354         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
355         for name in file_names:
356           self.assertFalse(name in updated_collection['manifest_text'])
357
358         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
359         for name in file_names:
360           self.assertTrue(name in updated_collection['manifest_text'])
361
362
363 # Move one file at a time from one collection into another
364 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
365     def setUp(self):
366         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
367
368     @profiled
369     def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
370         # Create collection
371         collection = arvados.collection.Collection(api_client=self.api)
372         for j in range(0, files_per_stream):
373             with collection.open("file"+str(j)+".txt", "w") as f:
374                 f.write(data)
375         collection.save_new()
376         return collection
377
378     @profiled
379     def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
380         for j in range(0, files_per_stream):
381             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
382                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
383             from_collection.update()
384             to_collection.update()
385
386     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
387         streams = 2
388         files_per_stream = 200
389         data = 'x'
390
391         collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
392         # Create collection with multiple files
393         collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
394
395         # Mount FuseMagicDir
396         self.make_mount(fuse.MagicDirectory)
397
398         # Move all files from collection2 into collection1
399         self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
400
401         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
402         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
403         for name in file_names:
404           self.assertFalse(name in updated_collection['manifest_text'])
405
406         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
407         for name in file_names:
408           self.assertTrue(name in updated_collection['manifest_text'])
409
410 class FuseListLargeProjectContents(MountTestBase):
411     @profiled
412     def listLargeProjectContents(self):
413         project_contents = llfuse.listdir(self.mounttmp)
414         self.assertEqual(201, len(project_contents))
415         self.assertIn('Collection_1', project_contents)
416
417         for collection_name in project_contents:
418             collection_contents = llfuse.listdir(os.path.join(self.mounttmp, collection_name))
419             self.assertIn('baz', collection_contents)
420
421     def test_listLargeProjectContents(self):
422         self.make_mount(fuse.ProjectDirectory,
423                         project_object=run_test_server.fixture('groups')['project_with_201_collections'])
424         self.listLargeProjectContents()