11308: Futurize stage2.
[arvados.git] / sdk / python / tests / test_stream.py
1 #!/usr/bin/env python
2
3 from __future__ import absolute_import
4 from builtins import object
5 import bz2
6 import gzip
7 import io
8 import mock
9 import os
10 import unittest
11 import hashlib
12
13 import arvados
14 from arvados import StreamReader, StreamFileReader
15 from arvados._ranges import Range
16
17 from . import arvados_testutil as tutil
18 from . import run_test_server
19
20 class StreamFileReaderTestCase(unittest.TestCase):
21     def make_count_reader(self):
22         stream = tutil.MockStreamReader('.', '01234', '34567', '67890')
23         return StreamFileReader(stream, [Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)],
24                                 'count.txt')
25
26     def test_read_block_crossing_behavior(self):
27         # read() calls will be aligned on block boundaries - see #3663.
28         sfile = self.make_count_reader()
29         self.assertEqual('123', sfile.read(10))
30
31     def test_small_read(self):
32         sfile = self.make_count_reader()
33         self.assertEqual('12', sfile.read(2))
34
35     def test_successive_reads(self):
36         sfile = self.make_count_reader()
37         for expect in ['123', '456', '789', '']:
38             self.assertEqual(expect, sfile.read(10))
39
40     def test_readfrom_spans_blocks(self):
41         sfile = self.make_count_reader()
42         self.assertEqual('6789', sfile.readfrom(5, 12))
43
44     def test_small_readfrom_spanning_blocks(self):
45         sfile = self.make_count_reader()
46         self.assertEqual('2345', sfile.readfrom(1, 4))
47
48     def test_readall(self):
49         sfile = self.make_count_reader()
50         self.assertEqual('123456789', ''.join(sfile.readall()))
51
52     def test_one_arg_seek(self):
53         self.test_absolute_seek([])
54
55     def test_absolute_seek(self, args=[os.SEEK_SET]):
56         sfile = self.make_count_reader()
57         sfile.seek(6, *args)
58         self.assertEqual('78', sfile.read(2))
59         sfile.seek(4, *args)
60         self.assertEqual('56', sfile.read(2))
61
62     def test_relative_seek(self, args=[os.SEEK_CUR]):
63         sfile = self.make_count_reader()
64         self.assertEqual('12', sfile.read(2))
65         sfile.seek(2, *args)
66         self.assertEqual('56', sfile.read(2))
67
68     def test_end_seek(self):
69         sfile = self.make_count_reader()
70         sfile.seek(-6, os.SEEK_END)
71         self.assertEqual('45', sfile.read(2))
72
73     def test_seek_min_zero(self):
74         sfile = self.make_count_reader()
75         sfile.seek(-2, os.SEEK_SET)
76         self.assertEqual(0, sfile.tell())
77
78     def test_seek_max_size(self):
79         sfile = self.make_count_reader()
80         sfile.seek(2, os.SEEK_END)
81         self.assertEqual(9, sfile.tell())
82
83     def test_size(self):
84         self.assertEqual(9, self.make_count_reader().size())
85
86     def test_tell_after_block_read(self):
87         sfile = self.make_count_reader()
88         sfile.read(5)
89         self.assertEqual(3, sfile.tell())
90
91     def test_tell_after_small_read(self):
92         sfile = self.make_count_reader()
93         sfile.read(1)
94         self.assertEqual(1, sfile.tell())
95
96     def test_no_read_after_close(self):
97         sfile = self.make_count_reader()
98         sfile.close()
99         self.assertRaises(ValueError, sfile.read, 2)
100
101     def test_context(self):
102         with self.make_count_reader() as sfile:
103             self.assertFalse(sfile.closed, "reader is closed inside context")
104             self.assertEqual('12', sfile.read(2))
105         self.assertTrue(sfile.closed, "reader is open after context")
106
107     def make_newlines_reader(self):
108         stream = tutil.MockStreamReader('.', 'one\ntwo\n\nth', 'ree\nfour\n\n')
109         return StreamFileReader(stream, [Range(0, 0, 11), Range(11, 11, 10)], 'count.txt')
110
111     def check_lines(self, actual):
112         self.assertEqual(['one\n', 'two\n', '\n', 'three\n', 'four\n', '\n'],
113                          actual)
114
115     def test_readline(self):
116         reader = self.make_newlines_reader()
117         actual = []
118         while True:
119             data = reader.readline()
120             if not data:
121                 break
122             actual.append(data)
123         self.check_lines(actual)
124
125     def test_readlines(self):
126         self.check_lines(self.make_newlines_reader().readlines())
127
128     def test_iteration(self):
129         self.check_lines(list(iter(self.make_newlines_reader())))
130
131     def test_readline_size(self):
132         reader = self.make_newlines_reader()
133         self.assertEqual('on', reader.readline(2))
134         self.assertEqual('e\n', reader.readline(4))
135         self.assertEqual('two\n', reader.readline(6))
136         self.assertEqual('\n', reader.readline(8))
137         self.assertEqual('thre', reader.readline(4))
138
139     def test_readlines_sizehint(self):
140         result = self.make_newlines_reader().readlines(8)
141         self.assertEqual(['one\n', 'two\n'], result[:2])
142         self.assertNotIn('three\n', result)
143
144     def test_name_attribute(self):
145         # Test both .name and .name() (for backward compatibility)
146         stream = tutil.MockStreamReader()
147         sfile = StreamFileReader(stream, [Range(0, 0, 0)], 'nametest')
148         self.assertEqual('nametest', sfile.name)
149         self.assertEqual('nametest', sfile.name())
150
151     def check_decompressed_name(self, filename, expect):
152         stream = tutil.MockStreamReader('.', '')
153         reader = StreamFileReader(stream, [Range(0, 0, 0)], filename)
154         self.assertEqual(expect, reader.decompressed_name())
155
156     def test_decompressed_name_uncompressed_file(self):
157         self.check_decompressed_name('test.log', 'test.log')
158
159     def test_decompressed_name_gzip_file(self):
160         self.check_decompressed_name('test.log.gz', 'test.log')
161
162     def test_decompressed_name_bz2_file(self):
163         self.check_decompressed_name('test.log.bz2', 'test.log')
164
165     def check_decompression(self, compress_ext, compress_func):
166         test_text = 'decompression\ntest\n'
167         test_data = compress_func(test_text)
168         stream = tutil.MockStreamReader('.', test_data)
169         reader = StreamFileReader(stream, [Range(0, 0, len(test_data))],
170                                   'test.' + compress_ext)
171         self.assertEqual(test_text, ''.join(reader.readall_decompressed()))
172
173     @staticmethod
174     def gzip_compress(data):
175         compressed_data = io.BytesIO()
176         with gzip.GzipFile(fileobj=compressed_data, mode='wb') as gzip_file:
177             gzip_file.write(data)
178         return compressed_data.getvalue()
179
180     def test_no_decompression(self):
181         self.check_decompression('log', lambda s: s)
182
183     def test_gzip_decompression(self):
184         self.check_decompression('gz', self.gzip_compress)
185
186     def test_bz2_decompression(self):
187         self.check_decompression('bz2', bz2.compress)
188
189     def test_readline_then_readlines(self):
190         reader = self.make_newlines_reader()
191         data = reader.readline()
192         self.assertEqual('one\n', data)
193         data = reader.readlines()
194         self.assertEqual(['two\n', '\n', 'three\n', 'four\n', '\n'], data)
195
196     def test_readline_then_readall(self):
197         reader = self.make_newlines_reader()
198         data = reader.readline()
199         self.assertEqual('one\n', data)
200         self.assertEqual(''.join(['two\n', '\n', 'three\n', 'four\n', '\n']), ''.join(reader.readall()))
201
202
203 class StreamRetryTestMixin(object):
204     # Define reader_for(coll_name, **kwargs)
205     # and read_for_test(reader, size, **kwargs).
206     API_COLLECTIONS = run_test_server.fixture('collections')
207
208     def keep_client(self):
209         return arvados.KeepClient(proxy='http://[%s]:1' % (tutil.TEST_HOST,),
210                                   local_store='')
211
212     def manifest_for(self, coll_name):
213         return self.API_COLLECTIONS[coll_name]['manifest_text']
214
215     @tutil.skip_sleep
216     def test_success_without_retries(self):
217         with tutil.mock_keep_responses('bar', 200):
218             reader = self.reader_for('bar_file')
219             self.assertEqual('bar', self.read_for_test(reader, 3))
220
221     @tutil.skip_sleep
222     def test_read_no_default_retry(self):
223         with tutil.mock_keep_responses('', 500):
224             reader = self.reader_for('user_agreement')
225             with self.assertRaises(arvados.errors.KeepReadError):
226                 self.read_for_test(reader, 10)
227
228     @tutil.skip_sleep
229     def test_read_with_instance_retries(self):
230         with tutil.mock_keep_responses('foo', 500, 200):
231             reader = self.reader_for('foo_file', num_retries=3)
232             self.assertEqual('foo', self.read_for_test(reader, 3))
233
234     @tutil.skip_sleep
235     def test_read_with_method_retries(self):
236         with tutil.mock_keep_responses('foo', 500, 200):
237             reader = self.reader_for('foo_file')
238             self.assertEqual('foo',
239                              self.read_for_test(reader, 3, num_retries=3))
240
241     @tutil.skip_sleep
242     def test_read_instance_retries_exhausted(self):
243         with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
244             reader = self.reader_for('bar_file', num_retries=3)
245             with self.assertRaises(arvados.errors.KeepReadError):
246                 self.read_for_test(reader, 3)
247
248     @tutil.skip_sleep
249     def test_read_method_retries_exhausted(self):
250         with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
251             reader = self.reader_for('bar_file')
252             with self.assertRaises(arvados.errors.KeepReadError):
253                 self.read_for_test(reader, 3, num_retries=3)
254
255     @tutil.skip_sleep
256     def test_method_retries_take_precedence(self):
257         with tutil.mock_keep_responses('', 500, 500, 500, 200):
258             reader = self.reader_for('user_agreement', num_retries=10)
259             with self.assertRaises(arvados.errors.KeepReadError):
260                 self.read_for_test(reader, 10, num_retries=1)
261
262
263 class StreamReaderTestCase(unittest.TestCase, StreamRetryTestMixin):
264     def reader_for(self, coll_name, **kwargs):
265         return StreamReader(self.manifest_for(coll_name).split(),
266                             self.keep_client(), **kwargs)
267
268     def read_for_test(self, reader, byte_count, **kwargs):
269         return reader.readfrom(0, byte_count, **kwargs)
270
271     def test_manifest_text_without_keep_client(self):
272         mtext = self.manifest_for('multilevel_collection_1')
273         for line in mtext.rstrip('\n').split('\n'):
274             reader = StreamReader(line.split())
275             self.assertEqual(line + '\n', reader.manifest_text())
276
277
278 class StreamFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
279     def reader_for(self, coll_name, **kwargs):
280         return StreamReader(self.manifest_for(coll_name).split(),
281                             self.keep_client(), **kwargs).all_files()[0]
282
283     def read_for_test(self, reader, byte_count, **kwargs):
284         return reader.read(byte_count, **kwargs)
285
286
287 class StreamFileReadFromTestCase(StreamFileReadTestCase):
288     def read_for_test(self, reader, byte_count, **kwargs):
289         return reader.readfrom(0, byte_count, **kwargs)
290
291
292 class StreamFileReadAllTestCase(StreamFileReadTestCase):
293     def read_for_test(self, reader, byte_count, **kwargs):
294         return ''.join(reader.readall(**kwargs))
295
296
297 class StreamFileReadAllDecompressedTestCase(StreamFileReadTestCase):
298     def read_for_test(self, reader, byte_count, **kwargs):
299         return ''.join(reader.readall_decompressed(**kwargs))
300
301
302 class StreamFileReadlinesTestCase(StreamFileReadTestCase):
303     def read_for_test(self, reader, byte_count, **kwargs):
304         return ''.join(reader.readlines(**kwargs))
305
306 if __name__ == '__main__':
307     unittest.main()