6219: some more magic dir based testing
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 import arvados
2 import arvados.safeapi
3 import arvados_fuse as fuse
4 import glob
5 import json
6 import llfuse
7 import os
8 import shutil
9 import subprocess
10 import sys
11 import tempfile
12 import threading
13 import time
14 import unittest
15 import logging
16 import multiprocessing
17 from .. import run_test_server
18 from ..mount_test_base import MountTestBase
19
20 logger = logging.getLogger('arvados.arv-mount')
21
22 from performance_profiler import profiled
23
24 @profiled
25 def fuse_CreateCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, blocks_per_file=1, bytes_per_block=1, data='x'):
26     class Test(unittest.TestCase):
27         def runTest(self):
28             for i in range(0, streams):
29                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
30
31                 # Create files
32                 for j in range(0, files_per_stream):
33                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
34                         f.write(data)
35
36     Test().runTest()
37
38 @profiled
39 def fuse_ReadContentsFromCollectionWithManyFiles(mounttmp, streams, files_per_stream, content):
40     class Test(unittest.TestCase):
41         def runTest(self):
42             for i in range(0, streams):
43                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
44                 for j in range(0, files_per_stream):
45                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
46                         self.assertEqual(content, f.read())
47
48     Test().runTest()
49
50 @profiled
51 def fuse_MoveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
52     class Test(unittest.TestCase):
53         def runTest(self):
54             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
55             self.assertIn(filename, d1)
56
57             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, filename))
58
59             d1 = llfuse.listdir(os.path.join(mounttmp))
60             self.assertIn(filename, d1)
61
62             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
63             self.assertNotIn(filename, d1)
64
65     Test().runTest()
66
67 @profiled
68 def fuse_DeleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
69     class Test(unittest.TestCase):
70         def runTest(self):
71             os.remove(os.path.join(mounttmp, stream, filename))
72
73     Test().runTest()
74
75 # Create a collection with two streams, each with 200 files
76 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
77     def setUp(self):
78         super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
79
80     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
81         collection = arvados.collection.Collection(api_client=self.api)
82         collection.save_new()
83
84         m = self.make_mount(fuse.CollectionDirectory)
85         with llfuse.lock:
86             m.new_collection(collection.api_response(), collection)
87         self.assertTrue(m.writable())
88
89         streams = 2
90         files_per_stream = 200
91         blocks_per_file = 1
92         bytes_per_block = 1
93
94         data = 'x' * blocks_per_file * bytes_per_block
95
96         self.pool.apply(fuse_CreateCollectionWithManyFiles, (self.mounttmp, streams,
97             files_per_stream, blocks_per_file, bytes_per_block, data))
98
99         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
100
101         for i in range(0, streams):
102             self.assertIn('./stream' + str(i), collection2["manifest_text"])
103
104         for i in range(0, files_per_stream):
105             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
106
107         # Read file contents
108         self.pool.apply(fuse_ReadContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
109
110         # Move file0.txt out of the streams into .
111         for i in range(0, streams):
112             self.pool.apply(fuse_MoveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
113
114         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
115
116         manifest_streams = collection2['manifest_text'].split('\n')
117         self.assertEqual(4, len(manifest_streams))
118
119         for i in range(0, streams):
120             self.assertIn('file0.txt', manifest_streams[0])
121
122         for i in range(0, streams):
123             self.assertNotIn('file0.txt', manifest_streams[i+1])
124
125         for i in range(0, streams):
126             for j in range(1, files_per_stream):
127                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
128
129         # Delete 'file1.txt' from all the streams
130         for i in range(0, streams):
131             self.pool.apply(fuse_DeleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
132
133         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
134
135         manifest_streams = collection2['manifest_text'].split('\n')
136         self.assertEqual(4, len(manifest_streams))
137
138         for i in range(0, streams):
139             self.assertIn('file0.txt', manifest_streams[0])
140
141         self.assertNotIn('file1.txt', collection2['manifest_text'])
142
143         for i in range(0, streams):
144             for j in range(2, files_per_stream):
145                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
146
147 def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
148     class Test(unittest.TestCase):
149         def runTest(self):
150             #os.rename(os.path.join(mounttmp, collection1, stream, filename), os.path.join(mounttmp, collection2, stream, filename))
151             os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
152
153     Test().runTest()
154
155 def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
156     class Test(unittest.TestCase):
157         def runTest(self):
158             os.remove(os.path.join(mounttmp, collection1, filename))
159
160     Test().runTest()
161
162
163 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
164     def setUp(self):
165         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
166
167     @profiled
168     def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
169             blocks_per_file=0, bytes_per_block=0, data='x'):
170         # Create collection
171         collection = arvados.collection.Collection(api_client=self.api)
172         for j in range(0, files_per_stream):
173             with collection.open("file"+str(j)+".txt", "w") as f:
174                 f.write(data)
175         collection.save_new()
176         return collection
177
178     @profiled
179     def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1,
180             blocks_per_file=1, bytes_per_block=1, data='x'):
181         mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
182
183         files = {}
184         for j in range(0, files_per_stream):
185             files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
186             #files[os.path.join(self.mounttmp, collection, 'stream'+str(i)+'/file'+str(j)+'.txt')] = data
187
188         for k, v in files.items():
189             with open(os.path.join(self.mounttmp, collection, k)) as f:
190                 self.assertEqual(v, f.read())
191
192     @profiled
193     def magicDirTest_moveFileFromCollection(self, from_collection, to_collection):
194         self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
195               to_collection.manifest_locator(), 'stream0', 'file0.txt',))
196         from_collection.update()
197         to_collection.update()
198
199     @profiled
200     def magicDirTest_removeFileFromCollection(self, collection):
201         self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection.manifest_locator(), 'stream0', 'file1.txt',))
202         collection.update()
203
204     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
205         streams = 2
206         files_per_stream = 200
207         blocks_per_file = 1
208         bytes_per_block = 1
209
210         data = 'x' * blocks_per_file * bytes_per_block
211
212         collection1 = self.magicDirTest_createCollectionWithManyFiles()
213         # Create collection with multiple files
214         collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream,
215                           blocks_per_file, bytes_per_block, data)
216
217         # Mount FuseMagicDir
218         self.make_mount(fuse.MagicDirectory)
219
220         self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams,
221             files_per_stream, blocks_per_file, bytes_per_block, data)
222
223         # Move file0.txt out of the collection2 into collection1
224         self.magicDirTest_moveFileFromCollection(collection2, collection1)
225         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
226         self.assertFalse('file0.txt' in updated_collection['manifest_text'])
227         self.assertTrue('file1.txt' in updated_collection['manifest_text'])
228
229         # Delete file1.txt from collection2
230         self.magicDirTest_removeFileFromCollection(collection2)
231         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
232         self.assertFalse('file1.txt' in updated_collection['manifest_text'])
233         self.assertTrue('file2.txt' in updated_collection['manifest_text'])
234
235
236 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
237     def setUp(self):
238         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
239
240     @profiled
241     def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
242             blocks_per_file=0, bytes_per_block=0, data='x'):
243         # Create collection
244         collection = arvados.collection.Collection(api_client=self.api)
245         for j in range(0, files_per_stream):
246             with collection.open("file"+str(j)+".txt", "w") as f:
247                 f.write(data)
248         collection.save_new()
249         return collection
250
251     @profiled
252     def magicDirTestMoveAllFiles_moveFilesFromCollection(self, from_collection, to_collection, files_per_stream):
253         for j in range(0, files_per_stream):
254             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
255                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
256         from_collection.update()
257         to_collection.update()
258
259     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
260         streams = 2
261         files_per_stream = 200
262         blocks_per_file = 1
263         bytes_per_block = 1
264
265         data = 'x' * blocks_per_file * bytes_per_block
266
267         collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
268         # Create collection with multiple files
269         collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream,
270                           blocks_per_file, bytes_per_block, data)
271
272         # Mount FuseMagicDir
273         self.make_mount(fuse.MagicDirectory)
274
275         # Move all files from collection2 into collection1
276         self.magicDirTestMoveAllFiles_moveFilesFromCollection(collection2, collection1, files_per_stream)
277
278         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
279         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
280         for name in file_names:
281           self.assertFalse(name in updated_collection['manifest_text'])
282
283         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
284         for name in file_names:
285           self.assertTrue(name in updated_collection['manifest_text'])
286
287
288 # Move one file at a time from one collection into another
289 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
290     def setUp(self):
291         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
292
293     @profiled
294     def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
295             blocks_per_file=0, bytes_per_block=0, data='x'):
296         # Create collection
297         collection = arvados.collection.Collection(api_client=self.api)
298         for j in range(0, files_per_stream):
299             with collection.open("file"+str(j)+".txt", "w") as f:
300                 f.write(data)
301         collection.save_new()
302         return collection
303
304     @profiled
305     def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
306         for j in range(0, files_per_stream):
307             self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
308                   to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
309             from_collection.update()
310             to_collection.update()
311
312     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
313         streams = 2
314         files_per_stream = 200
315         blocks_per_file = 1
316         bytes_per_block = 1
317
318         data = 'x' * blocks_per_file * bytes_per_block
319
320         collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
321         # Create collection with multiple files
322         collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream,
323                           blocks_per_file, bytes_per_block, data)
324
325         # Mount FuseMagicDir
326         self.make_mount(fuse.MagicDirectory)
327
328         # Move all files from collection2 into collection1
329         self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
330
331         updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
332         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
333         for name in file_names:
334           self.assertFalse(name in updated_collection['manifest_text'])
335
336         updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
337         for name in file_names:
338           self.assertTrue(name in updated_collection['manifest_text'])