21891: Remove unused sdk/go/manifest package and ManifestFileReader.
[arvados.git] / sdk / python / tests / test_stream.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 import bz2
6 import gzip
7 import io
8 import os
9 import unittest
10 import hashlib
11
12 from unittest import mock
13
14 import arvados
15 from arvados._ranges import Range
16
17 from . import arvados_testutil as tutil
18 from . import run_test_server
19
20 class StreamFileReaderTestMixin(object):
21     def test_read_block_crossing_behavior(self):
22         # read() calls will be aligned on block boundaries - see #3663.
23         sfile = self.make_count_reader()
24         self.assertEqual(b'123', sfile.read(10))
25
26     def test_small_read(self):
27         sfile = self.make_count_reader()
28         self.assertEqual(b'12', sfile.read(2))
29
30     def test_successive_reads(self):
31         sfile = self.make_count_reader()
32         for expect in [b'1234', b'5678', b'9', b'']:
33             self.assertEqual(expect, sfile.read(4))
34
35     def test_readfrom_spans_blocks(self):
36         sfile = self.make_count_reader()
37         self.assertEqual(b'6789', sfile.readfrom(5, 12))
38
39     def test_small_readfrom_spanning_blocks(self):
40         sfile = self.make_count_reader()
41         self.assertEqual(b'2345', sfile.readfrom(1, 4))
42
43     def test_readall(self):
44         sfile = self.make_count_reader()
45         self.assertEqual(b'123456789', b''.join(sfile.readall()))
46
47     def test_one_arg_seek(self):
48         self.test_absolute_seek([])
49
50     def test_absolute_seek(self, args=[os.SEEK_SET]):
51         sfile = self.make_count_reader()
52         sfile.seek(6, *args)
53         self.assertEqual(b'78', sfile.read(2))
54         sfile.seek(4, *args)
55         self.assertEqual(b'56', sfile.read(2))
56
57     def test_relative_seek(self, args=[os.SEEK_CUR]):
58         sfile = self.make_count_reader()
59         self.assertEqual(b'12', sfile.read(2))
60         sfile.seek(2, *args)
61         self.assertEqual(b'56', sfile.read(2))
62
63     def test_end_seek(self):
64         sfile = self.make_count_reader()
65         sfile.seek(-6, os.SEEK_END)
66         self.assertEqual(b'45', sfile.read(2))
67
68     def test_seek_min_zero(self):
69         sfile = self.make_count_reader()
70         self.assertEqual(0, sfile.tell())
71         with self.assertRaises(IOError):
72             sfile.seek(-2, os.SEEK_SET)
73         self.assertEqual(0, sfile.tell())
74
75     def test_seek_max_size(self):
76         sfile = self.make_count_reader()
77         sfile.seek(2, os.SEEK_END)
78         # POSIX permits seeking past end of file.
79         self.assertEqual(11, sfile.tell())
80
81     def test_size(self):
82         self.assertEqual(9, self.make_count_reader().size())
83
84     def test_tell_after_small_read(self):
85         sfile = self.make_count_reader()
86         sfile.read(1)
87         self.assertEqual(1, sfile.tell())
88
89     def test_no_read_after_close(self):
90         sfile = self.make_count_reader()
91         sfile.close()
92         self.assertRaises(ValueError, sfile.read, 2)
93
94     def test_context(self):
95         with self.make_count_reader() as sfile:
96             self.assertFalse(sfile.closed, "reader is closed inside context")
97             self.assertEqual(b'12', sfile.read(2))
98         self.assertTrue(sfile.closed, "reader is open after context")
99
100     def check_lines(self, actual):
101         self.assertEqual(['one\n', 'two\n', '\n', 'three\n', 'four\n', '\n'],
102                          actual)
103
104     def test_readline(self):
105         reader = self.make_newlines_reader()
106         actual = []
107         while True:
108             data = reader.readline()
109             if not data:
110                 break
111             actual.append(data)
112         self.check_lines(actual)
113
114     def test_readlines(self):
115         self.check_lines(self.make_newlines_reader().readlines())
116
117     def test_iteration(self):
118         self.check_lines(list(iter(self.make_newlines_reader())))
119
120     def test_readline_size(self):
121         reader = self.make_newlines_reader()
122         self.assertEqual('on', reader.readline(2))
123         self.assertEqual('e\n', reader.readline(4))
124         self.assertEqual('two\n', reader.readline(6))
125         self.assertEqual('\n', reader.readline(8))
126         self.assertEqual('thre', reader.readline(4))
127
128     def test_readlines_sizehint(self):
129         result = self.make_newlines_reader().readlines(8)
130         self.assertEqual(['one\n', 'two\n', '\n', 'three\n', 'four\n', '\n'], result)
131
132     def test_name_attribute(self):
133         sfile = self.make_file_reader(name='nametest')
134         self.assertEqual('nametest', sfile.name)
135
136     def check_decompressed_name(self, filename, expect):
137         reader = self.make_file_reader(name=filename)
138         self.assertEqual(expect, reader.decompressed_name())
139
140     def test_decompressed_name_uncompressed_file(self):
141         self.check_decompressed_name('test.log', 'test.log')
142
143     def test_decompressed_name_gzip_file(self):
144         self.check_decompressed_name('test.log.gz', 'test.log')
145
146     def test_decompressed_name_bz2_file(self):
147         self.check_decompressed_name('test.log.bz2', 'test.log')
148
149     def check_decompression(self, compress_ext, compress_func):
150         test_text = b'decompression\ntest\n'
151         test_data = compress_func(test_text)
152         reader = self.make_file_reader(name='test.'+compress_ext, data=test_data)
153         self.assertEqual(test_text, b''.join(reader.readall_decompressed()))
154
155     @staticmethod
156     def gzip_compress(data):
157         compressed_data = io.BytesIO()
158         with gzip.GzipFile(fileobj=compressed_data, mode='wb') as gzip_file:
159             gzip_file.write(data)
160         return compressed_data.getvalue()
161
162     def test_no_decompression(self):
163         self.check_decompression('log', lambda s: s)
164
165     def test_gzip_decompression(self):
166         self.check_decompression('gz', self.gzip_compress)
167
168     def test_bz2_decompression(self):
169         self.check_decompression('bz2', bz2.compress)
170
171     def test_readline_then_readlines(self):
172         reader = self.make_newlines_reader()
173         data = reader.readline()
174         self.assertEqual('one\n', data)
175         data = reader.readlines()
176         self.assertEqual(['two\n', '\n', 'three\n', 'four\n', '\n'], data)
177
178     def test_readline_then_readall(self):
179         reader = self.make_newlines_reader()
180         data = reader.readline()
181         self.assertEqual('one\n', data)
182         self.assertEqual(b''.join([b'two\n', b'\n', b'three\n', b'four\n', b'\n']), b''.join(reader.readall()))
183
184
185 class StreamRetryTestMixin(object):
186     # Define reader_for(coll_name, **kwargs)
187     # and read_for_test(reader, size, **kwargs).
188     API_COLLECTIONS = run_test_server.fixture('collections')
189
190     def keep_client(self):
191         return arvados.KeepClient(proxy='http://[%s]:1' % (tutil.TEST_HOST,),
192                                   local_store='')
193
194     def manifest_for(self, coll_name):
195         return self.API_COLLECTIONS[coll_name]['manifest_text']
196
197     @tutil.skip_sleep
198     def test_success_without_retries(self):
199         with tutil.mock_keep_responses('bar', 200):
200             reader = self.reader_for('bar_file')
201             self.assertEqual(b'bar', self.read_for_test(reader, 3))
202
203     @tutil.skip_sleep
204     def test_read_with_instance_retries(self):
205         with tutil.mock_keep_responses('foo', 500, 200):
206             reader = self.reader_for('foo_file', num_retries=3)
207             self.assertEqual(b'foo', self.read_for_test(reader, 3))
208
209     @tutil.skip_sleep
210     def test_read_with_method_retries(self):
211         with tutil.mock_keep_responses('foo', 500, 200):
212             reader = self.reader_for('foo_file')
213             self.assertEqual(b'foo',
214                              self.read_for_test(reader, 3, num_retries=3))
215
216     @tutil.skip_sleep
217     def test_read_instance_retries_exhausted(self):
218         with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
219             reader = self.reader_for('bar_file', num_retries=3)
220             with self.assertRaises(arvados.errors.KeepReadError):
221                 self.read_for_test(reader, 3)
222
223     @tutil.skip_sleep
224     def test_read_method_retries_exhausted(self):
225         with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
226             reader = self.reader_for('bar_file')
227             with self.assertRaises(arvados.errors.KeepReadError):
228                 self.read_for_test(reader, 3, num_retries=3)
229
230     @tutil.skip_sleep
231     def test_method_retries_take_precedence(self):
232         with tutil.mock_keep_responses('', 500, 500, 500, 200):
233             reader = self.reader_for('user_agreement', num_retries=10)
234             with self.assertRaises(arvados.errors.KeepReadError):
235                 self.read_for_test(reader, 10, num_retries=1)
236
237
238 if __name__ == '__main__':
239     unittest.main()