12 from arvados import StreamReader, StreamFileReader
13 from arvados._ranges import Range
15 import arvados_testutil as tutil
16 import run_test_server
18 class StreamFileReaderTestCase(unittest.TestCase):
19 def make_count_reader(self):
20 stream = tutil.MockStreamReader('.', '01234', '34567', '67890')
21 return StreamFileReader(stream, [Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)],
24 def test_read_block_crossing_behavior(self):
25 # read() calls will be aligned on block boundaries - see #3663.
26 sfile = self.make_count_reader()
27 self.assertEqual('123', sfile.read(10))
29 def test_small_read(self):
30 sfile = self.make_count_reader()
31 self.assertEqual('12', sfile.read(2))
33 def test_successive_reads(self):
34 sfile = self.make_count_reader()
35 for expect in ['123', '456', '789', '']:
36 self.assertEqual(expect, sfile.read(10))
38 def test_readfrom_spans_blocks(self):
39 sfile = self.make_count_reader()
40 self.assertEqual('6789', sfile.readfrom(5, 12))
42 def test_small_readfrom_spanning_blocks(self):
43 sfile = self.make_count_reader()
44 self.assertEqual('2345', sfile.readfrom(1, 4))
46 def test_readall(self):
47 sfile = self.make_count_reader()
48 self.assertEqual('123456789', ''.join(sfile.readall()))
50 def test_one_arg_seek(self):
51 self.test_absolute_seek([])
53 def test_absolute_seek(self, args=[os.SEEK_SET]):
54 sfile = self.make_count_reader()
56 self.assertEqual('78', sfile.read(2))
58 self.assertEqual('56', sfile.read(2))
60 def test_relative_seek(self, args=[os.SEEK_CUR]):
61 sfile = self.make_count_reader()
62 self.assertEqual('12', sfile.read(2))
64 self.assertEqual('56', sfile.read(2))
66 def test_end_seek(self):
67 sfile = self.make_count_reader()
68 sfile.seek(-6, os.SEEK_END)
69 self.assertEqual('45', sfile.read(2))
71 def test_seek_min_zero(self):
72 sfile = self.make_count_reader()
73 self.assertEqual(0, sfile.tell())
74 with self.assertRaises(IOError):
75 sfile.seek(-2, os.SEEK_SET)
76 self.assertEqual(0, sfile.tell())
78 def test_seek_max_size(self):
79 sfile = self.make_count_reader()
80 sfile.seek(2, os.SEEK_END)
81 # POSIX permits seeking past end of file.
82 self.assertEqual(11, sfile.tell())
85 self.assertEqual(9, self.make_count_reader().size())
87 def test_tell_after_block_read(self):
88 sfile = self.make_count_reader()
90 self.assertEqual(3, sfile.tell())
92 def test_tell_after_small_read(self):
93 sfile = self.make_count_reader()
95 self.assertEqual(1, sfile.tell())
97 def test_no_read_after_close(self):
98 sfile = self.make_count_reader()
100 self.assertRaises(ValueError, sfile.read, 2)
102 def test_context(self):
103 with self.make_count_reader() as sfile:
104 self.assertFalse(sfile.closed, "reader is closed inside context")
105 self.assertEqual('12', sfile.read(2))
106 self.assertTrue(sfile.closed, "reader is open after context")
108 def make_newlines_reader(self):
109 stream = tutil.MockStreamReader('.', 'one\ntwo\n\nth', 'ree\nfour\n\n')
110 return StreamFileReader(stream, [Range(0, 0, 11), Range(11, 11, 10)], 'count.txt')
112 def check_lines(self, actual):
113 self.assertEqual(['one\n', 'two\n', '\n', 'three\n', 'four\n', '\n'],
116 def test_readline(self):
117 reader = self.make_newlines_reader()
120 data = reader.readline()
124 self.check_lines(actual)
126 def test_readlines(self):
127 self.check_lines(self.make_newlines_reader().readlines())
129 def test_iteration(self):
130 self.check_lines(list(iter(self.make_newlines_reader())))
132 def test_readline_size(self):
133 reader = self.make_newlines_reader()
134 self.assertEqual('on', reader.readline(2))
135 self.assertEqual('e\n', reader.readline(4))
136 self.assertEqual('two\n', reader.readline(6))
137 self.assertEqual('\n', reader.readline(8))
138 self.assertEqual('thre', reader.readline(4))
140 def test_readlines_sizehint(self):
141 result = self.make_newlines_reader().readlines(8)
142 self.assertEqual(['one\n', 'two\n'], result[:2])
143 self.assertNotIn('three\n', result)
145 def test_name_attribute(self):
146 # Test both .name and .name() (for backward compatibility)
147 stream = tutil.MockStreamReader()
148 sfile = StreamFileReader(stream, [Range(0, 0, 0)], 'nametest')
149 self.assertEqual('nametest', sfile.name)
150 self.assertEqual('nametest', sfile.name())
152 def check_decompressed_name(self, filename, expect):
153 stream = tutil.MockStreamReader('.', '')
154 reader = StreamFileReader(stream, [Range(0, 0, 0)], filename)
155 self.assertEqual(expect, reader.decompressed_name())
157 def test_decompressed_name_uncompressed_file(self):
158 self.check_decompressed_name('test.log', 'test.log')
160 def test_decompressed_name_gzip_file(self):
161 self.check_decompressed_name('test.log.gz', 'test.log')
163 def test_decompressed_name_bz2_file(self):
164 self.check_decompressed_name('test.log.bz2', 'test.log')
166 def check_decompression(self, compress_ext, compress_func):
167 test_text = 'decompression\ntest\n'
168 test_data = compress_func(test_text)
169 stream = tutil.MockStreamReader('.', test_data)
170 reader = StreamFileReader(stream, [Range(0, 0, len(test_data))],
171 'test.' + compress_ext)
172 self.assertEqual(test_text, ''.join(reader.readall_decompressed()))
175 def gzip_compress(data):
176 compressed_data = io.BytesIO()
177 with gzip.GzipFile(fileobj=compressed_data, mode='wb') as gzip_file:
178 gzip_file.write(data)
179 return compressed_data.getvalue()
181 def test_no_decompression(self):
182 self.check_decompression('log', lambda s: s)
184 def test_gzip_decompression(self):
185 self.check_decompression('gz', self.gzip_compress)
187 def test_bz2_decompression(self):
188 self.check_decompression('bz2', bz2.compress)
190 def test_readline_then_readlines(self):
191 reader = self.make_newlines_reader()
192 data = reader.readline()
193 self.assertEqual('one\n', data)
194 data = reader.readlines()
195 self.assertEqual(['two\n', '\n', 'three\n', 'four\n', '\n'], data)
197 def test_readline_then_readall(self):
198 reader = self.make_newlines_reader()
199 data = reader.readline()
200 self.assertEqual('one\n', data)
201 self.assertEqual(''.join(['two\n', '\n', 'three\n', 'four\n', '\n']), ''.join(reader.readall()))
204 class StreamRetryTestMixin(object):
205 # Define reader_for(coll_name, **kwargs)
206 # and read_for_test(reader, size, **kwargs).
207 API_COLLECTIONS = run_test_server.fixture('collections')
209 def keep_client(self):
210 return arvados.KeepClient(proxy='http://[%s]:1' % (tutil.TEST_HOST,),
213 def manifest_for(self, coll_name):
214 return self.API_COLLECTIONS[coll_name]['manifest_text']
217 def test_success_without_retries(self):
218 with tutil.mock_keep_responses('bar', 200):
219 reader = self.reader_for('bar_file')
220 self.assertEqual('bar', self.read_for_test(reader, 3))
223 def test_read_no_default_retry(self):
224 with tutil.mock_keep_responses('', 500):
225 reader = self.reader_for('user_agreement')
226 with self.assertRaises(arvados.errors.KeepReadError):
227 self.read_for_test(reader, 10)
230 def test_read_with_instance_retries(self):
231 with tutil.mock_keep_responses('foo', 500, 200):
232 reader = self.reader_for('foo_file', num_retries=3)
233 self.assertEqual('foo', self.read_for_test(reader, 3))
236 def test_read_with_method_retries(self):
237 with tutil.mock_keep_responses('foo', 500, 200):
238 reader = self.reader_for('foo_file')
239 self.assertEqual('foo',
240 self.read_for_test(reader, 3, num_retries=3))
243 def test_read_instance_retries_exhausted(self):
244 with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
245 reader = self.reader_for('bar_file', num_retries=3)
246 with self.assertRaises(arvados.errors.KeepReadError):
247 self.read_for_test(reader, 3)
250 def test_read_method_retries_exhausted(self):
251 with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
252 reader = self.reader_for('bar_file')
253 with self.assertRaises(arvados.errors.KeepReadError):
254 self.read_for_test(reader, 3, num_retries=3)
257 def test_method_retries_take_precedence(self):
258 with tutil.mock_keep_responses('', 500, 500, 500, 200):
259 reader = self.reader_for('user_agreement', num_retries=10)
260 with self.assertRaises(arvados.errors.KeepReadError):
261 self.read_for_test(reader, 10, num_retries=1)
264 class StreamReaderTestCase(unittest.TestCase, StreamRetryTestMixin):
265 def reader_for(self, coll_name, **kwargs):
266 return StreamReader(self.manifest_for(coll_name).split(),
267 self.keep_client(), **kwargs)
269 def read_for_test(self, reader, byte_count, **kwargs):
270 return reader.readfrom(0, byte_count, **kwargs)
272 def test_manifest_text_without_keep_client(self):
273 mtext = self.manifest_for('multilevel_collection_1')
274 for line in mtext.rstrip('\n').split('\n'):
275 reader = StreamReader(line.split())
276 self.assertEqual(line + '\n', reader.manifest_text())
279 class StreamFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
280 def reader_for(self, coll_name, **kwargs):
281 return StreamReader(self.manifest_for(coll_name).split(),
282 self.keep_client(), **kwargs).all_files()[0]
284 def read_for_test(self, reader, byte_count, **kwargs):
285 return reader.read(byte_count, **kwargs)
288 class StreamFileReadFromTestCase(StreamFileReadTestCase):
289 def read_for_test(self, reader, byte_count, **kwargs):
290 return reader.readfrom(0, byte_count, **kwargs)
293 class StreamFileReadAllTestCase(StreamFileReadTestCase):
294 def read_for_test(self, reader, byte_count, **kwargs):
295 return ''.join(reader.readall(**kwargs))
298 class StreamFileReadAllDecompressedTestCase(StreamFileReadTestCase):
299 def read_for_test(self, reader, byte_count, **kwargs):
300 return ''.join(reader.readall_decompressed(**kwargs))
303 class StreamFileReadlinesTestCase(StreamFileReadTestCase):
304 def read_for_test(self, reader, byte_count, **kwargs):
305 return ''.join(reader.readlines(**kwargs))
307 if __name__ == '__main__':