1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
12 from unittest import mock
15 from arvados import StreamReader, StreamFileReader
16 from arvados._ranges import Range
18 from . import arvados_testutil as tutil
19 from . import run_test_server
21 class StreamFileReaderTestCase(unittest.TestCase):
22 def make_count_reader(self):
23 stream = tutil.MockStreamReader('.', '01234', '34567', '67890')
24 return StreamFileReader(stream, [Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)],
27 def test_read_block_crossing_behavior(self):
28 # read() calls will be aligned on block boundaries - see #3663.
29 sfile = self.make_count_reader()
30 self.assertEqual(b'123', sfile.read(10))
32 def test_small_read(self):
33 sfile = self.make_count_reader()
34 self.assertEqual(b'12', sfile.read(2))
36 def test_successive_reads(self):
37 sfile = self.make_count_reader()
38 for expect in [b'123', b'456', b'789', b'']:
39 self.assertEqual(expect, sfile.read(10))
41 def test_readfrom_spans_blocks(self):
42 sfile = self.make_count_reader()
43 self.assertEqual(b'6789', sfile.readfrom(5, 12))
45 def test_small_readfrom_spanning_blocks(self):
46 sfile = self.make_count_reader()
47 self.assertEqual(b'2345', sfile.readfrom(1, 4))
49 def test_readall(self):
50 sfile = self.make_count_reader()
51 self.assertEqual(b'123456789', b''.join(sfile.readall()))
53 def test_one_arg_seek(self):
54 self.test_absolute_seek([])
56 def test_absolute_seek(self, args=[os.SEEK_SET]):
57 sfile = self.make_count_reader()
59 self.assertEqual(b'78', sfile.read(2))
61 self.assertEqual(b'56', sfile.read(2))
63 def test_relative_seek(self, args=[os.SEEK_CUR]):
64 sfile = self.make_count_reader()
65 self.assertEqual(b'12', sfile.read(2))
67 self.assertEqual(b'56', sfile.read(2))
69 def test_end_seek(self):
70 sfile = self.make_count_reader()
71 sfile.seek(-6, os.SEEK_END)
72 self.assertEqual(b'45', sfile.read(2))
74 def test_seek_min_zero(self):
75 sfile = self.make_count_reader()
76 self.assertEqual(0, sfile.tell())
77 with self.assertRaises(IOError):
78 sfile.seek(-2, os.SEEK_SET)
79 self.assertEqual(0, sfile.tell())
81 def test_seek_max_size(self):
82 sfile = self.make_count_reader()
83 sfile.seek(2, os.SEEK_END)
84 # POSIX permits seeking past end of file.
85 self.assertEqual(11, sfile.tell())
88 self.assertEqual(9, self.make_count_reader().size())
90 def test_tell_after_block_read(self):
91 sfile = self.make_count_reader()
93 self.assertEqual(3, sfile.tell())
95 def test_tell_after_small_read(self):
96 sfile = self.make_count_reader()
98 self.assertEqual(1, sfile.tell())
100 def test_no_read_after_close(self):
101 sfile = self.make_count_reader()
103 self.assertRaises(ValueError, sfile.read, 2)
105 def test_context(self):
106 with self.make_count_reader() as sfile:
107 self.assertFalse(sfile.closed, "reader is closed inside context")
108 self.assertEqual(b'12', sfile.read(2))
109 self.assertTrue(sfile.closed, "reader is open after context")
111 def make_newlines_reader(self):
112 stream = tutil.MockStreamReader('.', 'one\ntwo\n\nth', 'ree\nfour\n\n')
113 return StreamFileReader(stream, [Range(0, 0, 11), Range(11, 11, 10)], 'count.txt')
115 def check_lines(self, actual):
116 self.assertEqual(['one\n', 'two\n', '\n', 'three\n', 'four\n', '\n'],
119 def test_readline(self):
120 reader = self.make_newlines_reader()
123 data = reader.readline()
127 self.check_lines(actual)
129 def test_readlines(self):
130 self.check_lines(self.make_newlines_reader().readlines())
132 def test_iteration(self):
133 self.check_lines(list(iter(self.make_newlines_reader())))
135 def test_readline_size(self):
136 reader = self.make_newlines_reader()
137 self.assertEqual('on', reader.readline(2))
138 self.assertEqual('e\n', reader.readline(4))
139 self.assertEqual('two\n', reader.readline(6))
140 self.assertEqual('\n', reader.readline(8))
141 self.assertEqual('thre', reader.readline(4))
143 def test_readlines_sizehint(self):
144 result = self.make_newlines_reader().readlines(8)
145 self.assertEqual(['one\n', 'two\n'], result[:2])
146 self.assertNotIn('three\n', result)
148 def test_name_attribute(self):
149 # Test both .name and .name() (for backward compatibility)
150 stream = tutil.MockStreamReader()
151 sfile = StreamFileReader(stream, [Range(0, 0, 0)], 'nametest')
152 self.assertEqual('nametest', sfile.name)
153 self.assertEqual('nametest', sfile.name())
155 def check_decompressed_name(self, filename, expect):
156 stream = tutil.MockStreamReader('.', '')
157 reader = StreamFileReader(stream, [Range(0, 0, 0)], filename)
158 self.assertEqual(expect, reader.decompressed_name())
160 def test_decompressed_name_uncompressed_file(self):
161 self.check_decompressed_name('test.log', 'test.log')
163 def test_decompressed_name_gzip_file(self):
164 self.check_decompressed_name('test.log.gz', 'test.log')
166 def test_decompressed_name_bz2_file(self):
167 self.check_decompressed_name('test.log.bz2', 'test.log')
169 def check_decompression(self, compress_ext, compress_func):
170 test_text = b'decompression\ntest\n'
171 test_data = compress_func(test_text)
172 stream = tutil.MockStreamReader('.', test_data)
173 reader = StreamFileReader(stream, [Range(0, 0, len(test_data))],
174 'test.' + compress_ext)
175 self.assertEqual(test_text, b''.join(reader.readall_decompressed()))
178 def gzip_compress(data):
179 compressed_data = io.BytesIO()
180 with gzip.GzipFile(fileobj=compressed_data, mode='wb') as gzip_file:
181 gzip_file.write(data)
182 return compressed_data.getvalue()
184 def test_no_decompression(self):
185 self.check_decompression('log', lambda s: s)
187 def test_gzip_decompression(self):
188 self.check_decompression('gz', self.gzip_compress)
190 def test_bz2_decompression(self):
191 self.check_decompression('bz2', bz2.compress)
193 def test_readline_then_readlines(self):
194 reader = self.make_newlines_reader()
195 data = reader.readline()
196 self.assertEqual('one\n', data)
197 data = reader.readlines()
198 self.assertEqual(['two\n', '\n', 'three\n', 'four\n', '\n'], data)
200 def test_readline_then_readall(self):
201 reader = self.make_newlines_reader()
202 data = reader.readline()
203 self.assertEqual('one\n', data)
204 self.assertEqual(b''.join([b'two\n', b'\n', b'three\n', b'four\n', b'\n']), b''.join(reader.readall()))
207 class StreamRetryTestMixin(object):
208 # Define reader_for(coll_name, **kwargs)
209 # and read_for_test(reader, size, **kwargs).
210 API_COLLECTIONS = run_test_server.fixture('collections')
212 def keep_client(self):
213 return arvados.KeepClient(proxy='http://[%s]:1' % (tutil.TEST_HOST,),
216 def manifest_for(self, coll_name):
217 return self.API_COLLECTIONS[coll_name]['manifest_text']
220 def test_success_without_retries(self):
221 with tutil.mock_keep_responses('bar', 200):
222 reader = self.reader_for('bar_file')
223 self.assertEqual(b'bar', self.read_for_test(reader, 3))
226 def test_read_with_instance_retries(self):
227 with tutil.mock_keep_responses('foo', 500, 200):
228 reader = self.reader_for('foo_file', num_retries=3)
229 self.assertEqual(b'foo', self.read_for_test(reader, 3))
232 def test_read_with_method_retries(self):
233 with tutil.mock_keep_responses('foo', 500, 200):
234 reader = self.reader_for('foo_file')
235 self.assertEqual(b'foo',
236 self.read_for_test(reader, 3, num_retries=3))
239 def test_read_instance_retries_exhausted(self):
240 with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
241 reader = self.reader_for('bar_file', num_retries=3)
242 with self.assertRaises(arvados.errors.KeepReadError):
243 self.read_for_test(reader, 3)
246 def test_read_method_retries_exhausted(self):
247 with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
248 reader = self.reader_for('bar_file')
249 with self.assertRaises(arvados.errors.KeepReadError):
250 self.read_for_test(reader, 3, num_retries=3)
253 def test_method_retries_take_precedence(self):
254 with tutil.mock_keep_responses('', 500, 500, 500, 200):
255 reader = self.reader_for('user_agreement', num_retries=10)
256 with self.assertRaises(arvados.errors.KeepReadError):
257 self.read_for_test(reader, 10, num_retries=1)
260 class StreamReaderTestCase(unittest.TestCase, StreamRetryTestMixin):
261 def reader_for(self, coll_name, **kwargs):
262 return StreamReader(self.manifest_for(coll_name).split(),
263 self.keep_client(), **kwargs)
265 def read_for_test(self, reader, byte_count, **kwargs):
266 return reader.readfrom(0, byte_count, **kwargs)
268 def test_manifest_text_without_keep_client(self):
269 mtext = self.manifest_for('multilevel_collection_1')
270 for line in mtext.rstrip('\n').split('\n'):
271 reader = StreamReader(line.split())
272 self.assertEqual(line + '\n', reader.manifest_text())
275 class StreamFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
276 def reader_for(self, coll_name, **kwargs):
277 return StreamReader(self.manifest_for(coll_name).split(),
278 self.keep_client(), **kwargs).all_files()[0]
280 def read_for_test(self, reader, byte_count, **kwargs):
281 return reader.read(byte_count, **kwargs)
284 class StreamFileReadFromTestCase(StreamFileReadTestCase):
285 def read_for_test(self, reader, byte_count, **kwargs):
286 return reader.readfrom(0, byte_count, **kwargs)
289 class StreamFileReadAllTestCase(StreamFileReadTestCase):
290 def read_for_test(self, reader, byte_count, **kwargs):
291 return b''.join(reader.readall(**kwargs))
294 class StreamFileReadAllDecompressedTestCase(StreamFileReadTestCase):
295 def read_for_test(self, reader, byte_count, **kwargs):
296 return b''.join(reader.readall_decompressed(**kwargs))
299 class StreamFileReadlinesTestCase(StreamFileReadTestCase):
300 def read_for_test(self, reader, byte_count, **kwargs):
301 return ''.join(reader.readlines(**kwargs)).encode()
303 if __name__ == '__main__':