6219: magic dir test in progress
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 import arvados
2 import arvados.safeapi
3 import arvados_fuse as fuse
4 import glob
5 import json
6 import llfuse
7 import os
8 import shutil
9 import subprocess
10 import sys
11 import tempfile
12 import threading
13 import time
14 import unittest
15 import logging
16 import multiprocessing
17 from .. import run_test_server
18 from ..mount_test_base import MountTestBase
19
20 logger = logging.getLogger('arvados.arv-mount')
21
22 from performance_profiler import profiled
23
24 @profiled
25 def fuseCreateCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, blocks_per_file=1, bytes_per_block=1, data='x'):
26     class Test(unittest.TestCase):
27         def runTest(self):
28             file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
29
30             for i in range(0, streams):
31                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
32
33                 # Create files
34                 for j in range(0, files_per_stream):
35                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
36                         f.write(data)
37
38     Test().runTest()
39
40 @profiled
41 def fuseReadContentsFromCollectionWithManyFiles(mounttmp, streams, files_per_stream, content):
42     class Test(unittest.TestCase):
43         def runTest(self):
44             for i in range(0, streams):
45                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
46                 for j in range(0, files_per_stream):
47                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
48                         self.assertEqual(content, f.read())
49
50     Test().runTest()
51
52 @profiled
53 def fuseMoveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
54     class Test(unittest.TestCase):
55         def runTest(self):
56             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
57             self.assertIn(filename, d1)
58
59             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, filename))
60
61             d1 = llfuse.listdir(os.path.join(mounttmp))
62             self.assertIn(filename, d1)
63
64             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
65             self.assertNotIn(filename, d1)
66
67     Test().runTest()
68
69 @profiled
70 def fuseDeleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
71     class Test(unittest.TestCase):
72         def runTest(self):
73             os.remove(os.path.join(mounttmp, stream, filename))
74
75     Test().runTest()
76
77 # Create a collection with two streams, each with 200 files
78 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
79     def setUp(self):
80         super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
81
82     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
83         collection = arvados.collection.Collection(api_client=self.api)
84         collection.save_new()
85
86         m = self.make_mount(fuse.CollectionDirectory)
87         with llfuse.lock:
88             m.new_collection(collection.api_response(), collection)
89         self.assertTrue(m.writable())
90
91         streams = 2
92         files_per_stream = 200
93         blocks_per_file = 1
94         bytes_per_block = 1
95
96         data = 'x' * blocks_per_file * bytes_per_block
97
98         self.pool.apply(fuseCreateCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, blocks_per_file, bytes_per_block, data))
99
100         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
101
102         for i in range(0, streams):
103             self.assertIn('./stream' + str(i), collection2["manifest_text"])
104
105         for i in range(0, files_per_stream):
106             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
107
108         # Read file contents
109         self.pool.apply(fuseReadContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
110
111         # Move file0.txt out of the streams into .
112         for i in range(0, streams):
113             self.pool.apply(fuseMoveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
114
115         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
116
117         manifest_streams = collection2['manifest_text'].split('\n')
118         self.assertEqual(4, len(manifest_streams))
119
120         for i in range(0, streams):
121             self.assertIn('file0.txt', manifest_streams[0])
122
123         for i in range(0, streams):
124             self.assertNotIn('file0.txt', manifest_streams[i+1])
125
126         for i in range(0, streams):
127             for j in range(1, files_per_stream):
128                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
129
130         # Delete 'file1.txt' from all the streams
131         for i in range(0, streams):
132             self.pool.apply(fuseDeleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
133
134         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
135
136         manifest_streams = collection2['manifest_text'].split('\n')
137         self.assertEqual(4, len(manifest_streams))
138
139         for i in range(0, streams):
140             self.assertIn('file0.txt', manifest_streams[0])
141
142         self.assertNotIn('file1.txt', collection2['manifest_text'])
143
144         for i in range(0, streams):
145             for j in range(2, files_per_stream):
146                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
147
148
149 @profiled
150 def magicDirTest_MoveFileFromCollectionWithManyFiles(mounttmp, collection1, collection2, stream, filename):
151     class Test(unittest.TestCase):
152         def runTest(self):
153             #os.rename(os.path.join(mounttmp, collection1, stream, filename), os.path.join(mounttmp, collection2, stream, filename))
154             print('TBD')
155
156     Test().runTest()
157
158 class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
159     def setUp(self):
160         super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
161
162     def magicDirTest_createCollectionWithManyFiles(self, streams=1, files_per_stream=1, blocks_per_file=1, bytes_per_block=1, data='x'):
163         # Create collection
164         cw = arvados.CollectionWriter()
165         for i in range(0, streams):
166             cw.start_new_stream('./stream' + str(i))
167             for j in range(0, files_per_stream):
168                 cw.start_new_file('file' + str(j) + '.txt')
169                 cw.write(data)
170
171         self.testcollection = cw.finish()
172         self.api.collections().create(body={"manifest_text":cw.manifest_text()}).execute()
173         return self.testcollection
174
175     @profiled
176     def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, blocks_per_file=1, bytes_per_block=1, data='x'):
177         stream_names = ["stream%i" % i for i in range(0, streams)]
178         file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
179
180         self.assertDirContents(collection, stream_names)
181         self.assertDirContents(os.path.join('by_id', collection), stream_names)
182
183         mount_ls = llfuse.listdir(self.mounttmp)
184         self.assertIn('README', mount_ls)
185         self.assertIn(collection, mount_ls)
186         self.assertIn(collection,
187                       llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
188
189         files = {}
190         for i in range(0, streams):
191           for j in range(0, files_per_stream):
192               files[os.path.join(self.mounttmp, collection, 'stream'+str(i)+'/file'+str(j)+'.txt')] = data
193
194         for k, v in files.items():
195             with open(os.path.join(self.mounttmp, k)) as f:
196                 self.assertEqual(v, f.read())
197
198     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
199         streams = 2
200         files_per_stream = 200
201         blocks_per_file = 1
202         bytes_per_block = 1
203
204         data = 'x' * blocks_per_file * bytes_per_block
205
206         collection1 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, blocks_per_file, bytes_per_block, data)
207         collection2 = self.magicDirTest_createCollectionWithManyFiles()
208
209         # Mount FuseMagicDir
210         self.make_mount(fuse.MagicDirectory)
211
212         self.magicDirTest_readCollectionContents(collection1, streams, files_per_stream, blocks_per_file, bytes_per_block, data)
213
214         # Move file0.txt out of the streams into .
215         self.pool.apply(magicDirTest_MoveFileFromCollectionWithManyFiles, (self.mounttmp, collection1, collection2, 'stream0', 'file1.txt',))