8784: Fix test for latest firefox.
[arvados.git] / sdk / python / tests / test_stream.py
1 from __future__ import absolute_import
2 from builtins import object
3 import bz2
4 import gzip
5 import io
6 import mock
7 import os
8 import unittest
9 import hashlib
10
11 import arvados
12 from arvados import StreamReader, StreamFileReader
13 from arvados._ranges import Range
14
15 from . import arvados_testutil as tutil
16 from . import run_test_server
17
18 class StreamFileReaderTestCase(unittest.TestCase):
19     def make_count_reader(self):
20         stream = tutil.MockStreamReader('.', '01234', '34567', '67890')
21         return StreamFileReader(stream, [Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)],
22                                 'count.txt')
23
24     def test_read_block_crossing_behavior(self):
25         # read() calls will be aligned on block boundaries - see #3663.
26         sfile = self.make_count_reader()
27         self.assertEqual(b'123', sfile.read(10))
28
29     def test_small_read(self):
30         sfile = self.make_count_reader()
31         self.assertEqual(b'12', sfile.read(2))
32
33     def test_successive_reads(self):
34         sfile = self.make_count_reader()
35         for expect in [b'123', b'456', b'789', b'']:
36             self.assertEqual(expect, sfile.read(10))
37
38     def test_readfrom_spans_blocks(self):
39         sfile = self.make_count_reader()
40         self.assertEqual(b'6789', sfile.readfrom(5, 12))
41
42     def test_small_readfrom_spanning_blocks(self):
43         sfile = self.make_count_reader()
44         self.assertEqual(b'2345', sfile.readfrom(1, 4))
45
46     def test_readall(self):
47         sfile = self.make_count_reader()
48         self.assertEqual(b'123456789', b''.join(sfile.readall()))
49
50     def test_one_arg_seek(self):
51         self.test_absolute_seek([])
52
53     def test_absolute_seek(self, args=[os.SEEK_SET]):
54         sfile = self.make_count_reader()
55         sfile.seek(6, *args)
56         self.assertEqual(b'78', sfile.read(2))
57         sfile.seek(4, *args)
58         self.assertEqual(b'56', sfile.read(2))
59
60     def test_relative_seek(self, args=[os.SEEK_CUR]):
61         sfile = self.make_count_reader()
62         self.assertEqual(b'12', sfile.read(2))
63         sfile.seek(2, *args)
64         self.assertEqual(b'56', sfile.read(2))
65
66     def test_end_seek(self):
67         sfile = self.make_count_reader()
68         sfile.seek(-6, os.SEEK_END)
69         self.assertEqual(b'45', sfile.read(2))
70
71     def test_seek_min_zero(self):
72         sfile = self.make_count_reader()
73         self.assertEqual(0, sfile.tell())
74         with self.assertRaises(IOError):
75             sfile.seek(-2, os.SEEK_SET)
76         self.assertEqual(0, sfile.tell())
77
78     def test_seek_max_size(self):
79         sfile = self.make_count_reader()
80         sfile.seek(2, os.SEEK_END)
81         # POSIX permits seeking past end of file.
82         self.assertEqual(11, sfile.tell())
83
84     def test_size(self):
85         self.assertEqual(9, self.make_count_reader().size())
86
87     def test_tell_after_block_read(self):
88         sfile = self.make_count_reader()
89         sfile.read(5)
90         self.assertEqual(3, sfile.tell())
91
92     def test_tell_after_small_read(self):
93         sfile = self.make_count_reader()
94         sfile.read(1)
95         self.assertEqual(1, sfile.tell())
96
97     def test_no_read_after_close(self):
98         sfile = self.make_count_reader()
99         sfile.close()
100         self.assertRaises(ValueError, sfile.read, 2)
101
102     def test_context(self):
103         with self.make_count_reader() as sfile:
104             self.assertFalse(sfile.closed, "reader is closed inside context")
105             self.assertEqual(b'12', sfile.read(2))
106         self.assertTrue(sfile.closed, "reader is open after context")
107
108     def make_newlines_reader(self):
109         stream = tutil.MockStreamReader('.', 'one\ntwo\n\nth', 'ree\nfour\n\n')
110         return StreamFileReader(stream, [Range(0, 0, 11), Range(11, 11, 10)], 'count.txt')
111
112     def check_lines(self, actual):
113         self.assertEqual(['one\n', 'two\n', '\n', 'three\n', 'four\n', '\n'],
114                          actual)
115
116     def test_readline(self):
117         reader = self.make_newlines_reader()
118         actual = []
119         while True:
120             data = reader.readline()
121             if not data:
122                 break
123             actual.append(data)
124         self.check_lines(actual)
125
126     def test_readlines(self):
127         self.check_lines(self.make_newlines_reader().readlines())
128
129     def test_iteration(self):
130         self.check_lines(list(iter(self.make_newlines_reader())))
131
132     def test_readline_size(self):
133         reader = self.make_newlines_reader()
134         self.assertEqual('on', reader.readline(2))
135         self.assertEqual('e\n', reader.readline(4))
136         self.assertEqual('two\n', reader.readline(6))
137         self.assertEqual('\n', reader.readline(8))
138         self.assertEqual('thre', reader.readline(4))
139
140     def test_readlines_sizehint(self):
141         result = self.make_newlines_reader().readlines(8)
142         self.assertEqual(['one\n', 'two\n'], result[:2])
143         self.assertNotIn('three\n', result)
144
145     def test_name_attribute(self):
146         # Test both .name and .name() (for backward compatibility)
147         stream = tutil.MockStreamReader()
148         sfile = StreamFileReader(stream, [Range(0, 0, 0)], 'nametest')
149         self.assertEqual('nametest', sfile.name)
150         self.assertEqual('nametest', sfile.name())
151
152     def check_decompressed_name(self, filename, expect):
153         stream = tutil.MockStreamReader('.', '')
154         reader = StreamFileReader(stream, [Range(0, 0, 0)], filename)
155         self.assertEqual(expect, reader.decompressed_name())
156
157     def test_decompressed_name_uncompressed_file(self):
158         self.check_decompressed_name('test.log', 'test.log')
159
160     def test_decompressed_name_gzip_file(self):
161         self.check_decompressed_name('test.log.gz', 'test.log')
162
163     def test_decompressed_name_bz2_file(self):
164         self.check_decompressed_name('test.log.bz2', 'test.log')
165
166     def check_decompression(self, compress_ext, compress_func):
167         test_text = b'decompression\ntest\n'
168         test_data = compress_func(test_text)
169         stream = tutil.MockStreamReader('.', test_data)
170         reader = StreamFileReader(stream, [Range(0, 0, len(test_data))],
171                                   'test.' + compress_ext)
172         self.assertEqual(test_text, b''.join(reader.readall_decompressed()))
173
174     @staticmethod
175     def gzip_compress(data):
176         compressed_data = io.BytesIO()
177         with gzip.GzipFile(fileobj=compressed_data, mode='wb') as gzip_file:
178             gzip_file.write(data)
179         return compressed_data.getvalue()
180
181     def test_no_decompression(self):
182         self.check_decompression('log', lambda s: s)
183
184     def test_gzip_decompression(self):
185         self.check_decompression('gz', self.gzip_compress)
186
187     def test_bz2_decompression(self):
188         self.check_decompression('bz2', bz2.compress)
189
190     def test_readline_then_readlines(self):
191         reader = self.make_newlines_reader()
192         data = reader.readline()
193         self.assertEqual('one\n', data)
194         data = reader.readlines()
195         self.assertEqual(['two\n', '\n', 'three\n', 'four\n', '\n'], data)
196
197     def test_readline_then_readall(self):
198         reader = self.make_newlines_reader()
199         data = reader.readline()
200         self.assertEqual('one\n', data)
201         self.assertEqual(b''.join([b'two\n', b'\n', b'three\n', b'four\n', b'\n']), b''.join(reader.readall()))
202
203
204 class StreamRetryTestMixin(object):
205     # Define reader_for(coll_name, **kwargs)
206     # and read_for_test(reader, size, **kwargs).
207     API_COLLECTIONS = run_test_server.fixture('collections')
208
209     def keep_client(self):
210         return arvados.KeepClient(proxy='http://[%s]:1' % (tutil.TEST_HOST,),
211                                   local_store='')
212
213     def manifest_for(self, coll_name):
214         return self.API_COLLECTIONS[coll_name]['manifest_text']
215
216     @tutil.skip_sleep
217     def test_success_without_retries(self):
218         with tutil.mock_keep_responses('bar', 200):
219             reader = self.reader_for('bar_file')
220             self.assertEqual(b'bar', self.read_for_test(reader, 3))
221
222     @tutil.skip_sleep
223     def test_read_no_default_retry(self):
224         with tutil.mock_keep_responses('', 500):
225             reader = self.reader_for('user_agreement')
226             with self.assertRaises(arvados.errors.KeepReadError):
227                 self.read_for_test(reader, 10)
228
229     @tutil.skip_sleep
230     def test_read_with_instance_retries(self):
231         with tutil.mock_keep_responses('foo', 500, 200):
232             reader = self.reader_for('foo_file', num_retries=3)
233             self.assertEqual(b'foo', self.read_for_test(reader, 3))
234
235     @tutil.skip_sleep
236     def test_read_with_method_retries(self):
237         with tutil.mock_keep_responses('foo', 500, 200):
238             reader = self.reader_for('foo_file')
239             self.assertEqual(b'foo',
240                              self.read_for_test(reader, 3, num_retries=3))
241
242     @tutil.skip_sleep
243     def test_read_instance_retries_exhausted(self):
244         with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
245             reader = self.reader_for('bar_file', num_retries=3)
246             with self.assertRaises(arvados.errors.KeepReadError):
247                 self.read_for_test(reader, 3)
248
249     @tutil.skip_sleep
250     def test_read_method_retries_exhausted(self):
251         with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
252             reader = self.reader_for('bar_file')
253             with self.assertRaises(arvados.errors.KeepReadError):
254                 self.read_for_test(reader, 3, num_retries=3)
255
256     @tutil.skip_sleep
257     def test_method_retries_take_precedence(self):
258         with tutil.mock_keep_responses('', 500, 500, 500, 200):
259             reader = self.reader_for('user_agreement', num_retries=10)
260             with self.assertRaises(arvados.errors.KeepReadError):
261                 self.read_for_test(reader, 10, num_retries=1)
262
263
264 class StreamReaderTestCase(unittest.TestCase, StreamRetryTestMixin):
265     def reader_for(self, coll_name, **kwargs):
266         return StreamReader(self.manifest_for(coll_name).split(),
267                             self.keep_client(), **kwargs)
268
269     def read_for_test(self, reader, byte_count, **kwargs):
270         return reader.readfrom(0, byte_count, **kwargs)
271
272     def test_manifest_text_without_keep_client(self):
273         mtext = self.manifest_for('multilevel_collection_1')
274         for line in mtext.rstrip('\n').split('\n'):
275             reader = StreamReader(line.split())
276             self.assertEqual(line + '\n', reader.manifest_text())
277
278
279 class StreamFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
280     def reader_for(self, coll_name, **kwargs):
281         return StreamReader(self.manifest_for(coll_name).split(),
282                             self.keep_client(), **kwargs).all_files()[0]
283
284     def read_for_test(self, reader, byte_count, **kwargs):
285         return reader.read(byte_count, **kwargs)
286
287
288 class StreamFileReadFromTestCase(StreamFileReadTestCase):
289     def read_for_test(self, reader, byte_count, **kwargs):
290         return reader.readfrom(0, byte_count, **kwargs)
291
292
293 class StreamFileReadAllTestCase(StreamFileReadTestCase):
294     def read_for_test(self, reader, byte_count, **kwargs):
295         return b''.join(reader.readall(**kwargs))
296
297
298 class StreamFileReadAllDecompressedTestCase(StreamFileReadTestCase):
299     def read_for_test(self, reader, byte_count, **kwargs):
300         return b''.join(reader.readall_decompressed(**kwargs))
301
302
303 class StreamFileReadlinesTestCase(StreamFileReadTestCase):
304     def read_for_test(self, reader, byte_count, **kwargs):
305         return ''.join(reader.readlines(**kwargs)).encode()
306
307 if __name__ == '__main__':
308     unittest.main()