6219: add back the test with files with multiple blocks that I lost somewhere along...
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 import arvados
2 import arvados.safeapi
3 import arvados_fuse as fuse
4 import glob
5 import json
6 import llfuse
7 import os
8 import shutil
9 import subprocess
10 import sys
11 import tempfile
12 import threading
13 import time
14 import unittest
15 import logging
16 import multiprocessing
17 from .. import run_test_server
18 from ..mount_test_base import MountTestBase
19
20 logger = logging.getLogger('arvados.arv-mount')
21
22 from performance_profiler import profiled
23
24 def fuse_CreateCollection(mounttmp, streams=1, files_per_stream=1, data='x'):
25     class Test(unittest.TestCase):
26         def runTest(self):
27             for i in range(0, streams):
28                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
29
30                 # Create files
31                 for j in range(0, files_per_stream):
32                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
33                         f.write(data)
34
35     Test().runTest()
36
37 def fuse_ReadContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
38     class Test(unittest.TestCase):
39         def runTest(self):
40             for i in range(0, streams):
41                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
42                 for j in range(0, files_per_stream):
43                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
44                         self.assertEqual(data, f.read())
45
46     Test().runTest()
47
48 def fuse_MoveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
49     class Test(unittest.TestCase):
50         def runTest(self):
51             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
52             self.assertIn(filename, d1)
53
54             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
55
56             d1 = llfuse.listdir(os.path.join(mounttmp))
57             self.assertIn('moved_from_'+stream+'_'+filename, d1)
58
59             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
60             self.assertNotIn(filename, d1)
61
62     Test().runTest()
63
64 def fuse_DeleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
65     class Test(unittest.TestCase):
66         def runTest(self):
67             os.remove(os.path.join(mounttmp, stream, filename))
68
69     Test().runTest()
70
71 # Create a collection with 2 streams, 3 files_per_stream, 2 blocks_per_file, 2**26 bytes_per_block
72 class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
73     def setUp(self):
74         super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
75
76     @profiled
77     def createCollectionWithMultipleBlocks(self, streams, files_per_stream, data):
78         self.pool.apply(fuse_CreateCollection, (self.mounttmp, streams, files_per_stream, data,))
79
80     @profiled
81     def readContentsOfCollectionWithMultipleBlocks(self, streams, files_per_stream, data):
82         self.pool.apply(fuse_ReadContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
83
84     @profiled
85     def moveFileFromCollectionWithMultipleBlocks(self, streams):
86         for i in range(0, streams):
87             self.pool.apply(fuse_MoveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
88
89     @profiled
90     def removeFileFromCollectionWithMultipleBlocks(self, streams):
91         for i in range(0, streams):
92             self.pool.apply(fuse_DeleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
93
94     def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
95         collection = arvados.collection.Collection(api_client=self.api)
96         collection.save_new()
97
98         m = self.make_mount(fuse.CollectionDirectory)
99         with llfuse.lock:
100             m.new_collection(collection.api_response(), collection)
101         self.assertTrue(m.writable())
102
103         streams = 2
104         files_per_stream = 3
105         blocks_per_file = 2
106         bytes_per_block = 2**26
107
108         data = 'x' * blocks_per_file * bytes_per_block
109
110         self.createCollectionWithMultipleBlocks(streams, files_per_stream, data)
111
112         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
113
114         for i in range(0, streams):
115             self.assertIn('./stream' + str(i), collection2["manifest_text"])
116
117         for i in range(0, files_per_stream):
118             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
119
120         # Read file contents
121         self.readContentsOfCollectionWithMultipleBlocks(streams, files_per_stream, data)
122
123         # Move file0.txt out of the streams into .
124         self.moveFileFromCollectionWithMultipleBlocks(streams)
125
126         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
127
128         manifest_streams = collection2['manifest_text'].split('\n')
129         self.assertEqual(4, len(manifest_streams))
130
131         for i in range(0, streams):
132             self.assertIn('file0.txt', manifest_streams[0])
133
134         for i in range(0, streams):
135             self.assertNotIn('file0.txt', manifest_streams[i+1])
136
137         for i in range(0, streams):
138             for j in range(1, files_per_stream):
139                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
140
141         # Delete 'file1.txt' from all the streams
142         self.removeFileFromCollectionWithMultipleBlocks(streams)
143
144         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
145
146         manifest_streams = collection2['manifest_text'].split('\n')
147         self.assertEqual(4, len(manifest_streams))
148
149         for i in range(0, streams):
150             self.assertIn('file0.txt', manifest_streams[0])
151
152         self.assertNotIn('file1.txt', collection2['manifest_text'])
153
154         for i in range(0, streams):
155             for j in range(2, files_per_stream):
156                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
157
158 # Create a collection with two streams, each with 200 files
159 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
160     def setUp(self):
161         super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
162
163     @profiled
164     def createCollectionWithManyFiles(self, streams, files_per_stream, data):
165         self.pool.apply(fuse_CreateCollection, (self.mounttmp, streams, files_per_stream, data,))
166
167     @profiled
168     def readContentsOfCollectionWithManyFiles(self, streams, files_per_stream, data):
169         self.pool.apply(fuse_ReadContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
170
171     @profiled
172     def moveFileFromCollectionWithManyFiles(self, streams):
173         for i in range(0, streams):
174             self.pool.apply(fuse_MoveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
175
176     @profiled
177     def removeFileFromCollectionWithManyFiles(self, streams):
178         for i in range(0, streams):
179             self.pool.apply(fuse_DeleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
180
181     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
182         collection = arvados.collection.Collection(api_client=self.api)
183         collection.save_new()
184
185         m = self.make_mount(fuse.CollectionDirectory)
186         with llfuse.lock:
187             m.new_collection(collection.api_response(), collection)
188         self.assertTrue(m.writable())
189
190         streams = 2
191         files_per_stream = 200
192
193         data = 'x'
194
195         self.createCollectionWithManyFiles(streams, files_per_stream, data)
196
197         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
198
199         for i in range(0, streams):
200             self.assertIn('./stream' + str(i), collection2["manifest_text"])
201
202         for i in range(0, files_per_stream):
203             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
204
205         # Read file contents
206         self.readContentsOfCollectionWithManyFiles(streams, files_per_stream, data)
207
208         # Move file0.txt out of the streams into .
209         self.moveFileFromCollectionWithManyFiles(streams)
210
211         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
212
213         manifest_streams = collection2['manifest_text'].split('\n')
214         self.assertEqual(4, len(manifest_streams))
215
216         for i in range(0, streams):
217             self.assertIn('file0.txt', manifest_streams[0])
218
219         for i in range(0, streams):
220             self.assertNotIn('file0.txt', manifest_streams[i+1])
221
222         for i in range(0, streams):
223             for j in range(1, files_per_stream):
224                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
225
226         # Delete 'file1.txt' from all the streams
227         self.removeFileFromCollectionWithManyFiles(streams)
228
229         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
230
231         manifest_streams = collection2['manifest_text'].split('\n')
232         self.assertEqual(4, len(manifest_streams))
233
234         for i in range(0, streams):
235             self.assertIn('file0.txt', manifest_streams[0])
236
237         self.assertNotIn('file1.txt', collection2['manifest_text'])
238
239         for i in range(0, streams):
240             for j in range(2, files_per_stream):
241                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
242
243 def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
244     class Test(unittest.TestCase):
245         def runTest(self):
246             #os.rename(os.path.join(mounttmp, collection1, stream, filename), os.path.join(mounttmp, collection2, stream, filename))
247             os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
248
249     Test().runTest()
250
251 def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
252     class Test(unittest.TestCase):
253         def runTest(self):
254             os.remove(os.path.join(mounttmp, collection1, filename))
255
256     Test().runTest()
257
258 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
259     def setUp(self):
260         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
261
262     @profiled
263     def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
264         # Create collection
265         collection = arvados.collection.Collection(api_client=self.api)
266         for j in range(0, files_per_stream):
267             with collection.open("file"+str(j)+".txt", "w") as f:
268                 f.write(data)
269         collection.save_new()
270         return collection
271
272     @profiled
273     def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, data='x'):
274         mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
275
276         files = {}
277         for j in range(0, files_per_stream):
278             files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
279             #files[os.path.join(self.mounttmp, collection, 'stream'+str(i)+'/file'+str(j)+'.txt')] = data
280
281         for k, v in files.items():
282             with open(os.path.join(self.mounttmp, collection, k)) as f:
283                 self.assertEqual(v, f.read())
284
285     @profiled
286     def magicDirTest_moveFileFromCollection(self, from_collection, to_collection):
287         self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
288               to_collection.manifest_locator(), 'stream0', 'file0.txt',))
289         from_collection.update()
290         to_collection.update()
291
292     @profiled
293     def magicDirTest_removeFileFromCollection(self, collection):
294         self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection.manifest_locator(), 'stream0', 'file1.txt',))
295         collection.update()
296
297     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
298         streams = 2
299         files_per_stream = 200
300         data = 'x'
301
302         collection1 = self.magicDirTest_createCollectionWithManyFiles()
303         # Create collection with multiple files
304         collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, data)
305
306         # Mount FuseMagicDir
307         self.make_mount(fuse.MagicDirectory)
308
309         self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
310
311         # Move file0.txt out of the collection2 into collection1
312         self.magicDirTest_moveFileFromCollection(collection2, collection1)
313         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
314         self.assertFalse('file0.txt' in updated_collection['manifest_text'])
315         self.assertTrue('file1.txt' in updated_collection['manifest_text'])
316
317         # Delete file1.txt from collection2
318         self.magicDirTest_removeFileFromCollection(collection2)
319         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
320         self.assertFalse('file1.txt' in updated_collection['manifest_text'])
321         self.assertTrue('file2.txt' in updated_collection['manifest_text'])
322
323
324 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
325     def setUp(self):
326         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
327
328     @profiled
329     def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
330             blocks_per_file=0, bytes_per_block=0, data='x'):
331         # Create collection
332         collection = arvados.collection.Collection(api_client=self.api)
333         for j in range(0, files_per_stream):
334             with collection.open("file"+str(j)+".txt", "w") as f:
335                 f.write(data)
336         collection.save_new()
337         return collection
338
339     @profiled
340     def magicDirTestMoveAllFiles_moveFilesFromCollection(self, from_collection, to_collection, files_per_stream):
341         for j in range(0, files_per_stream):
342             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
343                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
344         from_collection.update()
345         to_collection.update()
346
347     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
348         streams = 2
349         files_per_stream = 200
350         data = 'x'
351
352         collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
353         # Create collection with multiple files
354         collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
355
356         # Mount FuseMagicDir
357         self.make_mount(fuse.MagicDirectory)
358
359         # Move all files from collection2 into collection1
360         self.magicDirTestMoveAllFiles_moveFilesFromCollection(collection2, collection1, files_per_stream)
361
362         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
363         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
364         for name in file_names:
365           self.assertFalse(name in updated_collection['manifest_text'])
366
367         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
368         for name in file_names:
369           self.assertTrue(name in updated_collection['manifest_text'])
370
371
372 # Move one file at a time from one collection into another
373 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
374     def setUp(self):
375         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
376
377     @profiled
378     def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
379         # Create collection
380         collection = arvados.collection.Collection(api_client=self.api)
381         for j in range(0, files_per_stream):
382             with collection.open("file"+str(j)+".txt", "w") as f:
383                 f.write(data)
384         collection.save_new()
385         return collection
386
387     @profiled
388     def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
389         for j in range(0, files_per_stream):
390             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
391                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
392             from_collection.update()
393             to_collection.update()
394
395     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
396         streams = 2
397         files_per_stream = 200
398         data = 'x'
399
400         collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
401         # Create collection with multiple files
402         collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
403
404         # Mount FuseMagicDir
405         self.make_mount(fuse.MagicDirectory)
406
407         # Move all files from collection2 into collection1
408         self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
409
410         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
411         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
412         for name in file_names:
413           self.assertFalse(name in updated_collection['manifest_text'])
414
415         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
416         for name in file_names:
417           self.assertTrue(name in updated_collection['manifest_text'])