6219: minor test updates
[arvados.git] / services / fuse / tests / performance / test_collection_performance.py
1 import arvados
2 import arvados.safeapi
3 import arvados_fuse as fuse
4 import glob
5 import json
6 import llfuse
7 import os
8 import shutil
9 import subprocess
10 import sys
11 import tempfile
12 import threading
13 import time
14 import unittest
15 import logging
16 import multiprocessing
17 from .. import run_test_server
18 from ..mount_test_base import MountTestBase
19
20 logger = logging.getLogger('arvados.arv-mount')
21
22 from performance_profiler import profiled
23
24 @profiled
25 def fuseCreateCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, blocks_per_file=1, bytes_per_block=1, data='x'):
26     class Test(unittest.TestCase):
27         def runTest(self):
28             names = 'file0.txt'
29             for i in range(1, files_per_stream):
30                 names += ',file' + str(i) + '.txt'
31             file_names = names.split(',')
32
33             for i in range(0, streams):
34                 with self.assertRaises(IOError):
35                     with open(os.path.join(mounttmp, "./stream", "file0.txt"), "w") as f:
36                         f.write(data)
37
38                 os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
39
40                 with self.assertRaises(OSError):
41                     os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
42
43                 # Create files
44                 for j in range(0, files_per_stream):
45                     with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
46                         f.write(data)
47
48                 d1 = llfuse.listdir(os.path.join(mounttmp, "./stream" + str(i)))
49                 self.assertEqual(sorted(file_names), sorted(d1))
50
51     Test().runTest()
52
53 @profiled
54 def fuseReadContentsFromCollectionWithManyFiles(mounttmp, streams, files_per_stream, content):
55     class Test(unittest.TestCase):
56         def runTest(self):
57             for i in range(0, streams):
58                 d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
59                 for j in range(0, files_per_stream):
60                     with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
61                         self.assertEqual(content, f.read())
62
63     Test().runTest()
64
65 @profiled
66 def fuseMoveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
67     class Test(unittest.TestCase):
68         def runTest(self):
69             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
70             self.assertIn(filename, d1)
71
72             os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved-from-'+stream+'-'+filename))
73
74             d1 = llfuse.listdir(os.path.join(mounttmp))
75             self.assertIn('moved-from-'+stream+'-'+filename, d1)
76
77             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
78             self.assertNotIn(filename, d1)
79
80     Test().runTest()
81
82 @profiled
83 def fuseDeleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
84     class Test(unittest.TestCase):
85         def runTest(self):
86             d1 = llfuse.listdir(os.path.join(mounttmp, stream))
87
88             # Delete file
89             os.remove(os.path.join(mounttmp, stream, filename))
90
91             # Try to delete it again
92             with self.assertRaises(OSError):
93                 os.remove(os.path.join(mounttmp, "testdir", "file1.txt"))
94
95     Test().runTest()
96
97 # Create a collection with two streams, each with 200 files
98 class CreateCollectionWithManyFilesAndRenameMoveAndDeleteFile(MountTestBase):
99     def runTest(self):
100         collection = arvados.collection.Collection(api_client=self.api)
101         collection.save_new()
102
103         m = self.make_mount(fuse.CollectionDirectory)
104         with llfuse.lock:
105             m.new_collection(collection.api_response(), collection)
106         self.assertTrue(m.writable())
107
108         streams = 2
109         files_per_stream = 200
110         blocks_per_file = 1
111         bytes_per_block = 1
112
113         data = 'x' * blocks_per_file * bytes_per_block
114
115         self.pool.apply(fuseCreateCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, blocks_per_file, bytes_per_block, data))
116
117         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
118
119         for i in range(0, streams):
120             self.assertIn('./stream' + str(i), collection2["manifest_text"])
121
122         for i in range(0, files_per_stream):
123             self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
124
125         # Read file contents
126         self.pool.apply(fuseReadContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
127
128         # Move file0.txt out of the streams into .
129         for i in range(0, streams):
130             self.pool.apply(fuseMoveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
131
132         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
133
134         manifest_streams = collection2['manifest_text'].split('\n')
135         self.assertEqual(4, len(manifest_streams))
136
137         for i in range(0, streams):
138             self.assertIn('moved-from-stream'+str(i)+'-file0.txt', manifest_streams[0])
139
140         for i in range(0, streams):
141             self.assertNotIn('file0.txt', manifest_streams[i+1])
142
143         for i in range(0, streams):
144             for j in range(1, files_per_stream):
145                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
146
147         # Delete 'file1.txt' from all the streams
148         for i in range(0, streams):
149             self.pool.apply(fuseDeleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
150
151         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
152
153         manifest_streams = collection2['manifest_text'].split('\n')
154         self.assertEqual(4, len(manifest_streams))
155
156         for i in range(0, streams):
157             self.assertIn('moved-from-stream'+str(i)+'-file0.txt', manifest_streams[0])
158
159         self.assertNotIn('file1.txt', collection2['manifest_text'])
160
161         for i in range(0, streams):
162             for j in range(2, files_per_stream):
163                 self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])