21700: Install Bundler system-wide in Rails postinst
[arvados.git] / sdk / python / tests / test_stream.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6 from builtins import object
7 import bz2
8 import gzip
9 import io
10 import os
11 import unittest
12 import hashlib
13
14 from unittest import mock
15
16 import arvados
17 from arvados import StreamReader, StreamFileReader
18 from arvados._ranges import Range
19
20 from . import arvados_testutil as tutil
21 from . import run_test_server
22
23 class StreamFileReaderTestCase(unittest.TestCase):
24     def make_count_reader(self):
25         stream = tutil.MockStreamReader('.', '01234', '34567', '67890')
26         return StreamFileReader(stream, [Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)],
27                                 'count.txt')
28
29     def test_read_block_crossing_behavior(self):
30         # read() calls will be aligned on block boundaries - see #3663.
31         sfile = self.make_count_reader()
32         self.assertEqual(b'123', sfile.read(10))
33
34     def test_small_read(self):
35         sfile = self.make_count_reader()
36         self.assertEqual(b'12', sfile.read(2))
37
38     def test_successive_reads(self):
39         sfile = self.make_count_reader()
40         for expect in [b'123', b'456', b'789', b'']:
41             self.assertEqual(expect, sfile.read(10))
42
43     def test_readfrom_spans_blocks(self):
44         sfile = self.make_count_reader()
45         self.assertEqual(b'6789', sfile.readfrom(5, 12))
46
47     def test_small_readfrom_spanning_blocks(self):
48         sfile = self.make_count_reader()
49         self.assertEqual(b'2345', sfile.readfrom(1, 4))
50
51     def test_readall(self):
52         sfile = self.make_count_reader()
53         self.assertEqual(b'123456789', b''.join(sfile.readall()))
54
55     def test_one_arg_seek(self):
56         self.test_absolute_seek([])
57
58     def test_absolute_seek(self, args=[os.SEEK_SET]):
59         sfile = self.make_count_reader()
60         sfile.seek(6, *args)
61         self.assertEqual(b'78', sfile.read(2))
62         sfile.seek(4, *args)
63         self.assertEqual(b'56', sfile.read(2))
64
65     def test_relative_seek(self, args=[os.SEEK_CUR]):
66         sfile = self.make_count_reader()
67         self.assertEqual(b'12', sfile.read(2))
68         sfile.seek(2, *args)
69         self.assertEqual(b'56', sfile.read(2))
70
71     def test_end_seek(self):
72         sfile = self.make_count_reader()
73         sfile.seek(-6, os.SEEK_END)
74         self.assertEqual(b'45', sfile.read(2))
75
76     def test_seek_min_zero(self):
77         sfile = self.make_count_reader()
78         self.assertEqual(0, sfile.tell())
79         with self.assertRaises(IOError):
80             sfile.seek(-2, os.SEEK_SET)
81         self.assertEqual(0, sfile.tell())
82
83     def test_seek_max_size(self):
84         sfile = self.make_count_reader()
85         sfile.seek(2, os.SEEK_END)
86         # POSIX permits seeking past end of file.
87         self.assertEqual(11, sfile.tell())
88
89     def test_size(self):
90         self.assertEqual(9, self.make_count_reader().size())
91
92     def test_tell_after_block_read(self):
93         sfile = self.make_count_reader()
94         sfile.read(5)
95         self.assertEqual(3, sfile.tell())
96
97     def test_tell_after_small_read(self):
98         sfile = self.make_count_reader()
99         sfile.read(1)
100         self.assertEqual(1, sfile.tell())
101
102     def test_no_read_after_close(self):
103         sfile = self.make_count_reader()
104         sfile.close()
105         self.assertRaises(ValueError, sfile.read, 2)
106
107     def test_context(self):
108         with self.make_count_reader() as sfile:
109             self.assertFalse(sfile.closed, "reader is closed inside context")
110             self.assertEqual(b'12', sfile.read(2))
111         self.assertTrue(sfile.closed, "reader is open after context")
112
113     def make_newlines_reader(self):
114         stream = tutil.MockStreamReader('.', 'one\ntwo\n\nth', 'ree\nfour\n\n')
115         return StreamFileReader(stream, [Range(0, 0, 11), Range(11, 11, 10)], 'count.txt')
116
117     def check_lines(self, actual):
118         self.assertEqual(['one\n', 'two\n', '\n', 'three\n', 'four\n', '\n'],
119                          actual)
120
121     def test_readline(self):
122         reader = self.make_newlines_reader()
123         actual = []
124         while True:
125             data = reader.readline()
126             if not data:
127                 break
128             actual.append(data)
129         self.check_lines(actual)
130
131     def test_readlines(self):
132         self.check_lines(self.make_newlines_reader().readlines())
133
134     def test_iteration(self):
135         self.check_lines(list(iter(self.make_newlines_reader())))
136
137     def test_readline_size(self):
138         reader = self.make_newlines_reader()
139         self.assertEqual('on', reader.readline(2))
140         self.assertEqual('e\n', reader.readline(4))
141         self.assertEqual('two\n', reader.readline(6))
142         self.assertEqual('\n', reader.readline(8))
143         self.assertEqual('thre', reader.readline(4))
144
145     def test_readlines_sizehint(self):
146         result = self.make_newlines_reader().readlines(8)
147         self.assertEqual(['one\n', 'two\n'], result[:2])
148         self.assertNotIn('three\n', result)
149
150     def test_name_attribute(self):
151         # Test both .name and .name() (for backward compatibility)
152         stream = tutil.MockStreamReader()
153         sfile = StreamFileReader(stream, [Range(0, 0, 0)], 'nametest')
154         self.assertEqual('nametest', sfile.name)
155         self.assertEqual('nametest', sfile.name())
156
157     def check_decompressed_name(self, filename, expect):
158         stream = tutil.MockStreamReader('.', '')
159         reader = StreamFileReader(stream, [Range(0, 0, 0)], filename)
160         self.assertEqual(expect, reader.decompressed_name())
161
162     def test_decompressed_name_uncompressed_file(self):
163         self.check_decompressed_name('test.log', 'test.log')
164
165     def test_decompressed_name_gzip_file(self):
166         self.check_decompressed_name('test.log.gz', 'test.log')
167
168     def test_decompressed_name_bz2_file(self):
169         self.check_decompressed_name('test.log.bz2', 'test.log')
170
171     def check_decompression(self, compress_ext, compress_func):
172         test_text = b'decompression\ntest\n'
173         test_data = compress_func(test_text)
174         stream = tutil.MockStreamReader('.', test_data)
175         reader = StreamFileReader(stream, [Range(0, 0, len(test_data))],
176                                   'test.' + compress_ext)
177         self.assertEqual(test_text, b''.join(reader.readall_decompressed()))
178
179     @staticmethod
180     def gzip_compress(data):
181         compressed_data = io.BytesIO()
182         with gzip.GzipFile(fileobj=compressed_data, mode='wb') as gzip_file:
183             gzip_file.write(data)
184         return compressed_data.getvalue()
185
186     def test_no_decompression(self):
187         self.check_decompression('log', lambda s: s)
188
189     def test_gzip_decompression(self):
190         self.check_decompression('gz', self.gzip_compress)
191
192     def test_bz2_decompression(self):
193         self.check_decompression('bz2', bz2.compress)
194
195     def test_readline_then_readlines(self):
196         reader = self.make_newlines_reader()
197         data = reader.readline()
198         self.assertEqual('one\n', data)
199         data = reader.readlines()
200         self.assertEqual(['two\n', '\n', 'three\n', 'four\n', '\n'], data)
201
202     def test_readline_then_readall(self):
203         reader = self.make_newlines_reader()
204         data = reader.readline()
205         self.assertEqual('one\n', data)
206         self.assertEqual(b''.join([b'two\n', b'\n', b'three\n', b'four\n', b'\n']), b''.join(reader.readall()))
207
208
209 class StreamRetryTestMixin(object):
210     # Define reader_for(coll_name, **kwargs)
211     # and read_for_test(reader, size, **kwargs).
212     API_COLLECTIONS = run_test_server.fixture('collections')
213
214     def keep_client(self):
215         return arvados.KeepClient(proxy='http://[%s]:1' % (tutil.TEST_HOST,),
216                                   local_store='')
217
218     def manifest_for(self, coll_name):
219         return self.API_COLLECTIONS[coll_name]['manifest_text']
220
221     @tutil.skip_sleep
222     def test_success_without_retries(self):
223         with tutil.mock_keep_responses('bar', 200):
224             reader = self.reader_for('bar_file')
225             self.assertEqual(b'bar', self.read_for_test(reader, 3))
226
227     @tutil.skip_sleep
228     def test_read_with_instance_retries(self):
229         with tutil.mock_keep_responses('foo', 500, 200):
230             reader = self.reader_for('foo_file', num_retries=3)
231             self.assertEqual(b'foo', self.read_for_test(reader, 3))
232
233     @tutil.skip_sleep
234     def test_read_with_method_retries(self):
235         with tutil.mock_keep_responses('foo', 500, 200):
236             reader = self.reader_for('foo_file')
237             self.assertEqual(b'foo',
238                              self.read_for_test(reader, 3, num_retries=3))
239
240     @tutil.skip_sleep
241     def test_read_instance_retries_exhausted(self):
242         with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
243             reader = self.reader_for('bar_file', num_retries=3)
244             with self.assertRaises(arvados.errors.KeepReadError):
245                 self.read_for_test(reader, 3)
246
247     @tutil.skip_sleep
248     def test_read_method_retries_exhausted(self):
249         with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
250             reader = self.reader_for('bar_file')
251             with self.assertRaises(arvados.errors.KeepReadError):
252                 self.read_for_test(reader, 3, num_retries=3)
253
254     @tutil.skip_sleep
255     def test_method_retries_take_precedence(self):
256         with tutil.mock_keep_responses('', 500, 500, 500, 200):
257             reader = self.reader_for('user_agreement', num_retries=10)
258             with self.assertRaises(arvados.errors.KeepReadError):
259                 self.read_for_test(reader, 10, num_retries=1)
260
261
262 class StreamReaderTestCase(unittest.TestCase, StreamRetryTestMixin):
263     def reader_for(self, coll_name, **kwargs):
264         return StreamReader(self.manifest_for(coll_name).split(),
265                             self.keep_client(), **kwargs)
266
267     def read_for_test(self, reader, byte_count, **kwargs):
268         return reader.readfrom(0, byte_count, **kwargs)
269
270     def test_manifest_text_without_keep_client(self):
271         mtext = self.manifest_for('multilevel_collection_1')
272         for line in mtext.rstrip('\n').split('\n'):
273             reader = StreamReader(line.split())
274             self.assertEqual(line + '\n', reader.manifest_text())
275
276
277 class StreamFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
278     def reader_for(self, coll_name, **kwargs):
279         return StreamReader(self.manifest_for(coll_name).split(),
280                             self.keep_client(), **kwargs).all_files()[0]
281
282     def read_for_test(self, reader, byte_count, **kwargs):
283         return reader.read(byte_count, **kwargs)
284
285
286 class StreamFileReadFromTestCase(StreamFileReadTestCase):
287     def read_for_test(self, reader, byte_count, **kwargs):
288         return reader.readfrom(0, byte_count, **kwargs)
289
290
291 class StreamFileReadAllTestCase(StreamFileReadTestCase):
292     def read_for_test(self, reader, byte_count, **kwargs):
293         return b''.join(reader.readall(**kwargs))
294
295
296 class StreamFileReadAllDecompressedTestCase(StreamFileReadTestCase):
297     def read_for_test(self, reader, byte_count, **kwargs):
298         return b''.join(reader.readall_decompressed(**kwargs))
299
300
301 class StreamFileReadlinesTestCase(StreamFileReadTestCase):
302     def read_for_test(self, reader, byte_count, **kwargs):
303         return ''.join(reader.readlines(**kwargs)).encode()
304
305 if __name__ == '__main__':
306     unittest.main()