3 from __future__ import absolute_import
4 from builtins import object
14 from arvados import StreamReader, StreamFileReader
15 from arvados._ranges import Range
17 from . import arvados_testutil as tutil
18 from . import run_test_server
20 class StreamFileReaderTestCase(unittest.TestCase):
21 def make_count_reader(self):
22 stream = tutil.MockStreamReader('.', '01234', '34567', '67890')
23 return StreamFileReader(stream, [Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)],
26 def test_read_block_crossing_behavior(self):
27 # read() calls will be aligned on block boundaries - see #3663.
28 sfile = self.make_count_reader()
29 self.assertEqual(b'123', sfile.read(10))
31 def test_small_read(self):
32 sfile = self.make_count_reader()
33 self.assertEqual(b'12', sfile.read(2))
35 def test_successive_reads(self):
36 sfile = self.make_count_reader()
37 for expect in [b'123', b'456', b'789', b'']:
38 self.assertEqual(expect, sfile.read(10))
40 def test_readfrom_spans_blocks(self):
41 sfile = self.make_count_reader()
42 self.assertEqual(b'6789', sfile.readfrom(5, 12))
44 def test_small_readfrom_spanning_blocks(self):
45 sfile = self.make_count_reader()
46 self.assertEqual(b'2345', sfile.readfrom(1, 4))
48 def test_readall(self):
49 sfile = self.make_count_reader()
50 self.assertEqual(b'123456789', b''.join(sfile.readall()))
52 def test_one_arg_seek(self):
53 self.test_absolute_seek([])
55 def test_absolute_seek(self, args=[os.SEEK_SET]):
56 sfile = self.make_count_reader()
58 self.assertEqual(b'78', sfile.read(2))
60 self.assertEqual(b'56', sfile.read(2))
62 def test_relative_seek(self, args=[os.SEEK_CUR]):
63 sfile = self.make_count_reader()
64 self.assertEqual(b'12', sfile.read(2))
66 self.assertEqual(b'56', sfile.read(2))
68 def test_end_seek(self):
69 sfile = self.make_count_reader()
70 sfile.seek(-6, os.SEEK_END)
71 self.assertEqual(b'45', sfile.read(2))
73 def test_seek_min_zero(self):
74 sfile = self.make_count_reader()
75 sfile.seek(-2, os.SEEK_SET)
76 self.assertEqual(0, sfile.tell())
78 def test_seek_max_size(self):
79 sfile = self.make_count_reader()
80 sfile.seek(2, os.SEEK_END)
81 self.assertEqual(9, sfile.tell())
84 self.assertEqual(9, self.make_count_reader().size())
86 def test_tell_after_block_read(self):
87 sfile = self.make_count_reader()
89 self.assertEqual(3, sfile.tell())
91 def test_tell_after_small_read(self):
92 sfile = self.make_count_reader()
94 self.assertEqual(1, sfile.tell())
96 def test_no_read_after_close(self):
97 sfile = self.make_count_reader()
99 self.assertRaises(ValueError, sfile.read, 2)
101 def test_context(self):
102 with self.make_count_reader() as sfile:
103 self.assertFalse(sfile.closed, "reader is closed inside context")
104 self.assertEqual(b'12', sfile.read(2))
105 self.assertTrue(sfile.closed, "reader is open after context")
107 def make_newlines_reader(self):
108 stream = tutil.MockStreamReader('.', 'one\ntwo\n\nth', 'ree\nfour\n\n')
109 return StreamFileReader(stream, [Range(0, 0, 11), Range(11, 11, 10)], 'count.txt')
111 def check_lines(self, actual):
112 self.assertEqual(['one\n', 'two\n', '\n', 'three\n', 'four\n', '\n'],
115 def test_readline(self):
116 reader = self.make_newlines_reader()
119 data = reader.readline()
123 self.check_lines(actual)
125 def test_readlines(self):
126 self.check_lines(self.make_newlines_reader().readlines())
128 def test_iteration(self):
129 self.check_lines(list(iter(self.make_newlines_reader())))
131 def test_readline_size(self):
132 reader = self.make_newlines_reader()
133 self.assertEqual('on', reader.readline(2))
134 self.assertEqual('e\n', reader.readline(4))
135 self.assertEqual('two\n', reader.readline(6))
136 self.assertEqual('\n', reader.readline(8))
137 self.assertEqual('thre', reader.readline(4))
139 def test_readlines_sizehint(self):
140 result = self.make_newlines_reader().readlines(8)
141 self.assertEqual(['one\n', 'two\n'], result[:2])
142 self.assertNotIn('three\n', result)
144 def test_name_attribute(self):
145 # Test both .name and .name() (for backward compatibility)
146 stream = tutil.MockStreamReader()
147 sfile = StreamFileReader(stream, [Range(0, 0, 0)], 'nametest')
148 self.assertEqual('nametest', sfile.name)
149 self.assertEqual('nametest', sfile.name())
151 def check_decompressed_name(self, filename, expect):
152 stream = tutil.MockStreamReader('.', '')
153 reader = StreamFileReader(stream, [Range(0, 0, 0)], filename)
154 self.assertEqual(expect, reader.decompressed_name())
156 def test_decompressed_name_uncompressed_file(self):
157 self.check_decompressed_name('test.log', 'test.log')
159 def test_decompressed_name_gzip_file(self):
160 self.check_decompressed_name('test.log.gz', 'test.log')
162 def test_decompressed_name_bz2_file(self):
163 self.check_decompressed_name('test.log.bz2', 'test.log')
165 def check_decompression(self, compress_ext, compress_func):
166 test_text = b'decompression\ntest\n'
167 test_data = compress_func(test_text)
168 stream = tutil.MockStreamReader('.', test_data)
169 reader = StreamFileReader(stream, [Range(0, 0, len(test_data))],
170 'test.' + compress_ext)
171 self.assertEqual(test_text, b''.join(reader.readall_decompressed()))
174 def gzip_compress(data):
175 compressed_data = io.BytesIO()
176 with gzip.GzipFile(fileobj=compressed_data, mode='wb') as gzip_file:
177 gzip_file.write(data)
178 return compressed_data.getvalue()
180 def test_no_decompression(self):
181 self.check_decompression('log', lambda s: s)
183 def test_gzip_decompression(self):
184 self.check_decompression('gz', self.gzip_compress)
186 def test_bz2_decompression(self):
187 self.check_decompression('bz2', bz2.compress)
189 def test_readline_then_readlines(self):
190 reader = self.make_newlines_reader()
191 data = reader.readline()
192 self.assertEqual('one\n', data)
193 data = reader.readlines()
194 self.assertEqual(['two\n', '\n', 'three\n', 'four\n', '\n'], data)
196 def test_readline_then_readall(self):
197 reader = self.make_newlines_reader()
198 data = reader.readline()
199 self.assertEqual('one\n', data)
200 self.assertEqual(b''.join([b'two\n', b'\n', b'three\n', b'four\n', b'\n']), b''.join(reader.readall()))
203 class StreamRetryTestMixin(object):
204 # Define reader_for(coll_name, **kwargs)
205 # and read_for_test(reader, size, **kwargs).
206 API_COLLECTIONS = run_test_server.fixture('collections')
208 def keep_client(self):
209 return arvados.KeepClient(proxy='http://[%s]:1' % (tutil.TEST_HOST,),
212 def manifest_for(self, coll_name):
213 return self.API_COLLECTIONS[coll_name]['manifest_text']
216 def test_success_without_retries(self):
217 with tutil.mock_keep_responses('bar', 200):
218 reader = self.reader_for('bar_file')
219 self.assertEqual(b'bar', self.read_for_test(reader, 3))
222 def test_read_no_default_retry(self):
223 with tutil.mock_keep_responses('', 500):
224 reader = self.reader_for('user_agreement')
225 with self.assertRaises(arvados.errors.KeepReadError):
226 self.read_for_test(reader, 10)
229 def test_read_with_instance_retries(self):
230 with tutil.mock_keep_responses('foo', 500, 200):
231 reader = self.reader_for('foo_file', num_retries=3)
232 self.assertEqual(b'foo', self.read_for_test(reader, 3))
235 def test_read_with_method_retries(self):
236 with tutil.mock_keep_responses('foo', 500, 200):
237 reader = self.reader_for('foo_file')
238 self.assertEqual(b'foo',
239 self.read_for_test(reader, 3, num_retries=3))
242 def test_read_instance_retries_exhausted(self):
243 with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
244 reader = self.reader_for('bar_file', num_retries=3)
245 with self.assertRaises(arvados.errors.KeepReadError):
246 self.read_for_test(reader, 3)
249 def test_read_method_retries_exhausted(self):
250 with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
251 reader = self.reader_for('bar_file')
252 with self.assertRaises(arvados.errors.KeepReadError):
253 self.read_for_test(reader, 3, num_retries=3)
256 def test_method_retries_take_precedence(self):
257 with tutil.mock_keep_responses('', 500, 500, 500, 200):
258 reader = self.reader_for('user_agreement', num_retries=10)
259 with self.assertRaises(arvados.errors.KeepReadError):
260 self.read_for_test(reader, 10, num_retries=1)
263 class StreamReaderTestCase(unittest.TestCase, StreamRetryTestMixin):
264 def reader_for(self, coll_name, **kwargs):
265 return StreamReader(self.manifest_for(coll_name).split(),
266 self.keep_client(), **kwargs)
268 def read_for_test(self, reader, byte_count, **kwargs):
269 return reader.readfrom(0, byte_count, **kwargs)
271 def test_manifest_text_without_keep_client(self):
272 mtext = self.manifest_for('multilevel_collection_1')
273 for line in mtext.rstrip('\n').split('\n'):
274 reader = StreamReader(line.split())
275 self.assertEqual(line + '\n', reader.manifest_text())
278 class StreamFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
279 def reader_for(self, coll_name, **kwargs):
280 return StreamReader(self.manifest_for(coll_name).split(),
281 self.keep_client(), **kwargs).all_files()[0]
283 def read_for_test(self, reader, byte_count, **kwargs):
284 return reader.read(byte_count, **kwargs)
287 class StreamFileReadFromTestCase(StreamFileReadTestCase):
288 def read_for_test(self, reader, byte_count, **kwargs):
289 return reader.readfrom(0, byte_count, **kwargs)
292 class StreamFileReadAllTestCase(StreamFileReadTestCase):
293 def read_for_test(self, reader, byte_count, **kwargs):
294 return b''.join(reader.readall(**kwargs))
297 class StreamFileReadAllDecompressedTestCase(StreamFileReadTestCase):
298 def read_for_test(self, reader, byte_count, **kwargs):
299 return b''.join(reader.readall_decompressed(**kwargs))
302 class StreamFileReadlinesTestCase(StreamFileReadTestCase):
303 def read_for_test(self, reader, byte_count, **kwargs):
304 return ''.join(reader.readlines(**kwargs)).encode()
306 if __name__ == '__main__':