Merge branch '4434-collation' closes #4434
[arvados.git] / sdk / python / tests / test_stream.py
1 #!/usr/bin/env python
2
3 import bz2
4 import gzip
5 import io
6 import mock
7 import os
8 import unittest
9
10 import arvados
11 from arvados import StreamReader, StreamFileReader
12
13 import arvados_testutil as tutil
14 import run_test_server
15
16 class StreamFileReaderTestCase(unittest.TestCase):
17     def make_count_reader(self):
18         stream = tutil.MockStreamReader('.', '01234', '34567', '67890')
19         return StreamFileReader(stream, [[1, 3, 0], [6, 3, 3], [11, 3, 6]],
20                                 'count.txt')
21
22     def test_read_returns_first_block(self):
23         # read() calls will be aligned on block boundaries - see #3663.
24         sfile = self.make_count_reader()
25         self.assertEqual('123', sfile.read(10))
26
27     def test_small_read(self):
28         sfile = self.make_count_reader()
29         self.assertEqual('12', sfile.read(2))
30
31     def test_successive_reads(self):
32         sfile = self.make_count_reader()
33         for expect in ['123', '456', '789', '']:
34             self.assertEqual(expect, sfile.read(10))
35
36     def test_readfrom_spans_blocks(self):
37         sfile = self.make_count_reader()
38         self.assertEqual('6789', sfile.readfrom(5, 12))
39
40     def test_small_readfrom_spanning_blocks(self):
41         sfile = self.make_count_reader()
42         self.assertEqual('2345', sfile.readfrom(1, 4))
43
44     def test_readall(self):
45         sfile = self.make_count_reader()
46         self.assertEqual('123456789', ''.join(sfile.readall()))
47
48     def test_one_arg_seek(self):
49         self.test_relative_seek([])
50
51     def test_absolute_seek(self, args=[os.SEEK_SET]):
52         sfile = self.make_count_reader()
53         sfile.seek(6, *args)
54         self.assertEqual('78', sfile.read(2))
55         sfile.seek(4, *args)
56         self.assertEqual('56', sfile.read(2))
57
58     def test_relative_seek(self, args=[os.SEEK_CUR]):
59         sfile = self.make_count_reader()
60         self.assertEqual('12', sfile.read(2))
61         sfile.seek(2, *args)
62         self.assertEqual('56', sfile.read(2))
63
64     def test_end_seek(self):
65         sfile = self.make_count_reader()
66         sfile.seek(-6, os.SEEK_END)
67         self.assertEqual('45', sfile.read(2))
68
69     def test_seek_min_zero(self):
70         sfile = self.make_count_reader()
71         sfile.seek(-2, os.SEEK_SET)
72         self.assertEqual(0, sfile.tell())
73
74     def test_seek_max_size(self):
75         sfile = self.make_count_reader()
76         sfile.seek(2, os.SEEK_END)
77         self.assertEqual(9, sfile.tell())
78
79     def test_size(self):
80         self.assertEqual(9, self.make_count_reader().size())
81
82     def test_tell_after_block_read(self):
83         sfile = self.make_count_reader()
84         sfile.read(5)
85         self.assertEqual(3, sfile.tell())
86
87     def test_tell_after_small_read(self):
88         sfile = self.make_count_reader()
89         sfile.read(1)
90         self.assertEqual(1, sfile.tell())
91
92     def test_no_read_after_close(self):
93         sfile = self.make_count_reader()
94         sfile.close()
95         self.assertRaises(ValueError, sfile.read, 2)
96
97     def test_context(self):
98         with self.make_count_reader() as sfile:
99             self.assertFalse(sfile.closed, "reader is closed inside context")
100             self.assertEqual('12', sfile.read(2))
101         self.assertTrue(sfile.closed, "reader is open after context")
102
103     def make_newlines_reader(self):
104         stream = tutil.MockStreamReader('.', 'one\ntwo\n\nth', 'ree\nfour\n\n')
105         return StreamFileReader(stream, [[0, 11, 0], [11, 10, 11]], 'count.txt')
106
107     def check_lines(self, actual):
108         self.assertEqual(['one\n', 'two\n', '\n', 'three\n', 'four\n', '\n'],
109                          actual)
110
111     def test_readline(self):
112         reader = self.make_newlines_reader()
113         actual = []
114         while True:
115             data = reader.readline()
116             if not data:
117                 break
118             actual.append(data)
119         self.check_lines(actual)
120
121     def test_readlines(self):
122         self.check_lines(self.make_newlines_reader().readlines())
123
124     def test_iteration(self):
125         self.check_lines(list(iter(self.make_newlines_reader())))
126
127     def test_readline_size(self):
128         reader = self.make_newlines_reader()
129         self.assertEqual('on', reader.readline(2))
130         self.assertEqual('e\n', reader.readline(4))
131         self.assertEqual('two\n', reader.readline(6))
132         self.assertEqual('\n', reader.readline(8))
133         self.assertEqual('thre', reader.readline(4))
134
135     def test_readlines_sizehint(self):
136         result = self.make_newlines_reader().readlines(8)
137         self.assertEqual(['one\n', 'two\n'], result[:2])
138         self.assertNotIn('three\n', result)
139
140     def test_name_attribute(self):
141         # Test both .name and .name() (for backward compatibility)
142         stream = tutil.MockStreamReader()
143         sfile = StreamFileReader(stream, [[0, 0, 0]], 'nametest')
144         self.assertEqual('nametest', sfile.name)
145         self.assertEqual('nametest', sfile.name())
146
147     def check_decompression(self, compress_ext, compress_func):
148         test_text = 'decompression\ntest\n'
149         test_data = compress_func(test_text)
150         stream = tutil.MockStreamReader('.', test_data)
151         reader = StreamFileReader(stream, [[0, len(test_data), 0]],
152                                   'test.' + compress_ext)
153         self.assertEqual(test_text, ''.join(reader.readall_decompressed()))
154
155     @staticmethod
156     def gzip_compress(data):
157         compressed_data = io.BytesIO()
158         with gzip.GzipFile(fileobj=compressed_data, mode='wb') as gzip_file:
159             gzip_file.write(data)
160         return compressed_data.getvalue()
161
162     def test_no_decompression(self):
163         self.check_decompression('log', lambda s: s)
164
165     def test_gzip_decompression(self):
166         self.check_decompression('gz', self.gzip_compress)
167
168     def test_bz2_decompression(self):
169         self.check_decompression('bz2', bz2.compress)
170
171
172 class StreamRetryTestMixin(object):
173     # Define reader_for(coll_name, **kwargs)
174     # and read_for_test(reader, size, **kwargs).
175     API_COLLECTIONS = run_test_server.fixture('collections')
176
177     def keep_client(self):
178         return arvados.KeepClient(proxy='http://[%s]:1' % (tutil.TEST_HOST,),
179                                   local_store='')
180
181     def manifest_for(self, coll_name):
182         return self.API_COLLECTIONS[coll_name]['manifest_text']
183
184     @tutil.skip_sleep
185     def test_success_without_retries(self):
186         reader = self.reader_for('bar_file')
187         with tutil.mock_get_responses('bar', 200):
188             self.assertEqual('bar', self.read_for_test(reader, 3))
189
190     @tutil.skip_sleep
191     def test_read_no_default_retry(self):
192         reader = self.reader_for('user_agreement')
193         with tutil.mock_get_responses('', 500):
194             with self.assertRaises(arvados.errors.KeepReadError):
195                 self.read_for_test(reader, 10)
196
197     @tutil.skip_sleep
198     def test_read_with_instance_retries(self):
199         reader = self.reader_for('foo_file', num_retries=3)
200         with tutil.mock_get_responses('foo', 500, 200):
201             self.assertEqual('foo', self.read_for_test(reader, 3))
202
203     @tutil.skip_sleep
204     def test_read_with_method_retries(self):
205         reader = self.reader_for('foo_file')
206         with tutil.mock_get_responses('foo', 500, 200):
207             self.assertEqual('foo',
208                              self.read_for_test(reader, 3, num_retries=3))
209
210     @tutil.skip_sleep
211     def test_read_instance_retries_exhausted(self):
212         reader = self.reader_for('bar_file', num_retries=3)
213         with tutil.mock_get_responses('bar', 500, 500, 500, 500, 200):
214             with self.assertRaises(arvados.errors.KeepReadError):
215                 self.read_for_test(reader, 3)
216
217     @tutil.skip_sleep
218     def test_read_method_retries_exhausted(self):
219         reader = self.reader_for('bar_file')
220         with tutil.mock_get_responses('bar', 500, 500, 500, 500, 200):
221             with self.assertRaises(arvados.errors.KeepReadError):
222                 self.read_for_test(reader, 3, num_retries=3)
223
224     @tutil.skip_sleep
225     def test_method_retries_take_precedence(self):
226         reader = self.reader_for('user_agreement', num_retries=10)
227         with tutil.mock_get_responses('', 500, 500, 500, 200):
228             with self.assertRaises(arvados.errors.KeepReadError):
229                 self.read_for_test(reader, 10, num_retries=1)
230
231
232 class StreamReaderTestCase(unittest.TestCase, StreamRetryTestMixin):
233     def reader_for(self, coll_name, **kwargs):
234         return StreamReader(self.manifest_for(coll_name).split(),
235                             self.keep_client(), **kwargs)
236
237     def read_for_test(self, reader, byte_count, **kwargs):
238         return reader.readfrom(0, byte_count, **kwargs)
239
240     def test_manifest_text_without_keep_client(self):
241         mtext = self.manifest_for('multilevel_collection_1')
242         for line in mtext.rstrip('\n').split('\n'):
243             reader = StreamReader(line.split())
244             self.assertEqual(line + '\n', reader.manifest_text())
245
246
247 class StreamFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
248     def reader_for(self, coll_name, **kwargs):
249         return StreamReader(self.manifest_for(coll_name).split(),
250                             self.keep_client(), **kwargs).all_files()[0]
251
252     def read_for_test(self, reader, byte_count, **kwargs):
253         return reader.read(byte_count, **kwargs)
254
255
256 class StreamFileReadFromTestCase(StreamFileReadTestCase):
257     def read_for_test(self, reader, byte_count, **kwargs):
258         return reader.readfrom(0, byte_count, **kwargs)
259
260
261 class StreamFileReadAllTestCase(StreamFileReadTestCase):
262     def read_for_test(self, reader, byte_count, **kwargs):
263         return ''.join(reader.readall(**kwargs))
264
265
266 class StreamFileReadAllDecompressedTestCase(StreamFileReadTestCase):
267     def read_for_test(self, reader, byte_count, **kwargs):
268         return ''.join(reader.readall_decompressed(**kwargs))
269
270
271 class StreamFileReadlinesTestCase(StreamFileReadTestCase):
272     def read_for_test(self, reader, byte_count, **kwargs):
273         return ''.join(reader.readlines(**kwargs))
274
275
276 if __name__ == '__main__':
277     unittest.main()