6219: add test to create collection and read contents using magic dir.
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 import arvados
2 import arvados.safeapi
3 import arvados_fuse as fuse
4 import glob
5 import json
6 import llfuse
7 import os
8 import shutil
9 import subprocess
10 import sys
11 import tempfile
12 import threading
13 import time
14 import unittest
15 import logging
16 import multiprocessing
17 from .. import run_test_server
18 from ..mount_test_base import MountTestBase
19
20 logger = logging.getLogger('arvados.arv-mount')
21
22 from performance_profiler import profiled
23
24 def fuseCreateCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, blocks_per_file=1, bytes_per_block=1, data='x'):
25     class Test(unittest.TestCase):
26         def runTest(self):
27             names = 'file0.txt'
28             for i in range(1, files_per_stream):
29                 names += ',file' + str(i) + '.txt'
30             file_names = names.split(',')
31
32             for i in range(0, streams):
33                 with self.assertRaises(IOError):
34                     with open(os.path.join(mounttmp, "./stream", "file0.txt"), "w") as f:
35                         f.write(data)
36
37                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
38
39                 with self.assertRaises(OSError):
40                     os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
41
42                 # Create files
43                 for j in range(0, files_per_stream):
44                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
45                         f.write(data)
46
47                 d1 = llfuse.listdir(os.path.join(mounttmp, "./stream" + str(i)))
48                 self.assertEqual(sorted(file_names), sorted(d1))
49
50     Test().runTest()
51
52 def fuseReadContentsFromCollectionWithManyFiles(mounttmp, streams, files_per_stream, content):
53     class Test(unittest.TestCase):
54         def runTest(self):
55             for i in range(0, streams):
56                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
57                 for j in range(0, files_per_stream):
58                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
59                         self.assertEqual(content, f.read())
60
61     Test().runTest()
62
63 def fuseMoveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
64     class Test(unittest.TestCase):
65         def runTest(self):
66             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
67             self.assertIn(filename, d1)
68
69             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved-from-'+stream+'-'+filename))
70
71             d1 = llfuse.listdir(os.path.join(mounttmp))
72             self.assertIn('moved-from-'+stream+'-'+filename, d1)
73
74             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
75             self.assertNotIn(filename, d1)
76
77     Test().runTest()
78
79 def fuseDeleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
80     class Test(unittest.TestCase):
81         def runTest(self):
82             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
83
84             # Delete file
85             os.remove(os.path.join(mounttmp, stream, filename))
86
87             # Try to delete it again
88             with self.assertRaises(OSError):
89                 os.remove(os.path.join(mounttmp, "testdir", "file1.txt"))
90
91     Test().runTest()
92
93 # Create a collection with two streams, each with 200 files
94 class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
95     @profiled
96     def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
97         collection = arvados.collection.Collection(api_client=self.api)
98         collection.save_new()
99
100         m = self.make_mount(fuse.CollectionDirectory)
101         with llfuse.lock:
102             m.new_collection(collection.api_response(), collection)
103         self.assertTrue(m.writable())
104
105         streams = 2
106         files_per_stream = 200
107         blocks_per_file = 1
108         bytes_per_block = 1
109
110         data = 'x' * blocks_per_file * bytes_per_block
111
112         self.pool.apply(fuseCreateCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, blocks_per_file, bytes_per_block, data))
113
114         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
115
116         for i in range(0, streams):
117             self.assertIn('./stream' + str(i), collection2["manifest_text"])
118
119         for i in range(0, files_per_stream):
120             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
121
122         # Read file contents
123         self.pool.apply(fuseReadContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
124
125         # Move file0.txt out of the streams into .
126         for i in range(0, streams):
127             self.pool.apply(fuseMoveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
128
129         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
130
131         manifest_streams = collection2['manifest_text'].split('\n')
132         self.assertEqual(4, len(manifest_streams))
133
134         for i in range(0, streams):
135             self.assertIn('moved-from-stream'+str(i)+'-file0.txt', manifest_streams[0])
136
137         for i in range(0, streams):
138             self.assertNotIn('file0.txt', manifest_streams[i+1])
139
140         for i in range(0, streams):
141             for j in range(1, files_per_stream):
142                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
143
144         # Delete 'file1.txt' from all the streams
145         for i in range(0, streams):
146             self.pool.apply(fuseDeleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
147
148         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
149
150         manifest_streams = collection2['manifest_text'].split('\n')
151         self.assertEqual(4, len(manifest_streams))
152
153         for i in range(0, streams):
154             self.assertIn('moved-from-stream'+str(i)+'-file0.txt', manifest_streams[0])
155
156         self.assertNotIn('file1.txt', collection2['manifest_text'])
157
158         for i in range(0, streams):
159             for j in range(2, files_per_stream):
160                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
161
162
163 class UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
164     def setUp(self):
165         super(UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
166
167     @profiled
168     def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
169         # Create collection
170         cw = arvados.CollectionWriter()
171
172         streams = 2
173         files_per_stream = 200
174         blocks_per_file = 1
175         bytes_per_block = 1
176
177         data = 'x' * blocks_per_file * bytes_per_block
178         for i in range(0, streams):
179             cw.start_new_stream('./stream' + str(i))
180             for j in range(0, files_per_stream):
181                 cw.start_new_file('file' + str(j) + '.txt')
182                 cw.write(data)
183
184         self.testcollection = cw.finish()
185         self.api.collections().create(body={"manifest_text":cw.manifest_text()}).execute()
186
187         # Mount FuseMagicDir
188         self.make_mount(fuse.MagicDirectory)
189
190         mount_ls = llfuse.listdir(self.mounttmp)
191         self.assertIn('README', mount_ls)
192
193         self.assertFalse(any(arvados.util.keep_locator_pattern.match(fn) or
194                              arvados.util.uuid_pattern.match(fn)
195                              for fn in mount_ls),
196                          "new FUSE MagicDirectory lists Collection")
197
198         names = 'stream0'
199         for i in range(1, streams):
200             names += ',stream' + str(i)
201         stream_names = names.split(',')
202
203         names = 'file0.txt'
204         for i in range(1, files_per_stream):
205             names += ',file' + str(i) + '.txt'
206         file_names = names.split(',')
207
208         self.assertDirContents(self.testcollection, stream_names)
209         self.assertDirContents(os.path.join('by_id', self.testcollection), stream_names)
210
211         mount_ls = llfuse.listdir(self.mounttmp)
212         self.assertIn('README', mount_ls)
213         self.assertIn(self.testcollection, mount_ls)
214         self.assertIn(self.testcollection,
215                       llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
216
217         files = {}
218         for i in range(0, streams):
219           for j in range(0, files_per_stream):
220               files[os.path.join(self.mounttmp, self.testcollection, 'stream'+str(i)+'/file'+str(j)+'.txt')] = data
221
222         for k, v in files.items():
223             with open(os.path.join(self.mounttmp, k)) as f:
224                 self.assertEqual(v, f.read())