3198: Refactoring. Added Range object instead of fiddling with arrays, should
[arvados.git] / sdk / python / tests / test_stream.py
1 #!/usr/bin/env python
2
3 import bz2
4 import gzip
5 import io
6 import mock
7 import os
8 import unittest
9 import hashlib
10
11 import arvados
12 from arvados import StreamReader, StreamFileReader, StreamWriter, StreamFileWriter
13
14 import arvados_testutil as tutil
15 import run_test_server
16
17 class StreamFileReaderTestCase(unittest.TestCase):
18     def make_count_reader(self):
19         stream = tutil.MockStreamReader('.', '01234', '34567', '67890')
20         return StreamFileReader(stream, [[1, 3, 0], [6, 3, 3], [11, 3, 6]],
21                                 'count.txt')
22
23     def test_read_returns_first_block(self):
24         # read() calls will be aligned on block boundaries - see #3663.
25         sfile = self.make_count_reader()
26         self.assertEqual('123', sfile.read(10))
27
28     def test_small_read(self):
29         sfile = self.make_count_reader()
30         self.assertEqual('12', sfile.read(2))
31
32     def test_successive_reads(self):
33         sfile = self.make_count_reader()
34         for expect in ['123', '456', '789', '']:
35             self.assertEqual(expect, sfile.read(10))
36
37     def test_readfrom_spans_blocks(self):
38         sfile = self.make_count_reader()
39         self.assertEqual('6789', sfile.readfrom(5, 12))
40
41     def test_small_readfrom_spanning_blocks(self):
42         sfile = self.make_count_reader()
43         self.assertEqual('2345', sfile.readfrom(1, 4))
44
45     def test_readall(self):
46         sfile = self.make_count_reader()
47         self.assertEqual('123456789', ''.join(sfile.readall()))
48
49     def test_one_arg_seek(self):
50         self.test_relative_seek([])
51
52     def test_absolute_seek(self, args=[os.SEEK_SET]):
53         sfile = self.make_count_reader()
54         sfile.seek(6, *args)
55         self.assertEqual('78', sfile.read(2))
56         sfile.seek(4, *args)
57         self.assertEqual('56', sfile.read(2))
58
59     def test_relative_seek(self, args=[os.SEEK_CUR]):
60         sfile = self.make_count_reader()
61         self.assertEqual('12', sfile.read(2))
62         sfile.seek(2, *args)
63         self.assertEqual('56', sfile.read(2))
64
65     def test_end_seek(self):
66         sfile = self.make_count_reader()
67         sfile.seek(-6, os.SEEK_END)
68         self.assertEqual('45', sfile.read(2))
69
70     def test_seek_min_zero(self):
71         sfile = self.make_count_reader()
72         sfile.seek(-2, os.SEEK_SET)
73         self.assertEqual(0, sfile.tell())
74
75     def test_seek_max_size(self):
76         sfile = self.make_count_reader()
77         sfile.seek(2, os.SEEK_END)
78         self.assertEqual(9, sfile.tell())
79
80     def test_size(self):
81         self.assertEqual(9, self.make_count_reader().size())
82
83     def test_tell_after_block_read(self):
84         sfile = self.make_count_reader()
85         sfile.read(5)
86         self.assertEqual(3, sfile.tell())
87
88     def test_tell_after_small_read(self):
89         sfile = self.make_count_reader()
90         sfile.read(1)
91         self.assertEqual(1, sfile.tell())
92
93     def test_no_read_after_close(self):
94         sfile = self.make_count_reader()
95         sfile.close()
96         self.assertRaises(ValueError, sfile.read, 2)
97
98     def test_context(self):
99         with self.make_count_reader() as sfile:
100             self.assertFalse(sfile.closed, "reader is closed inside context")
101             self.assertEqual('12', sfile.read(2))
102         self.assertTrue(sfile.closed, "reader is open after context")
103
104     def make_newlines_reader(self):
105         stream = tutil.MockStreamReader('.', 'one\ntwo\n\nth', 'ree\nfour\n\n')
106         return StreamFileReader(stream, [[0, 11, 0], [11, 10, 11]], 'count.txt')
107
108     def check_lines(self, actual):
109         self.assertEqual(['one\n', 'two\n', '\n', 'three\n', 'four\n', '\n'],
110                          actual)
111
112     def test_readline(self):
113         reader = self.make_newlines_reader()
114         actual = []
115         while True:
116             data = reader.readline()
117             if not data:
118                 break
119             actual.append(data)
120         self.check_lines(actual)
121
122     def test_readlines(self):
123         self.check_lines(self.make_newlines_reader().readlines())
124
125     def test_iteration(self):
126         self.check_lines(list(iter(self.make_newlines_reader())))
127
128     def test_readline_size(self):
129         reader = self.make_newlines_reader()
130         self.assertEqual('on', reader.readline(2))
131         self.assertEqual('e\n', reader.readline(4))
132         self.assertEqual('two\n', reader.readline(6))
133         self.assertEqual('\n', reader.readline(8))
134         self.assertEqual('thre', reader.readline(4))
135
136     def test_readlines_sizehint(self):
137         result = self.make_newlines_reader().readlines(8)
138         self.assertEqual(['one\n', 'two\n'], result[:2])
139         self.assertNotIn('three\n', result)
140
141     def test_name_attribute(self):
142         # Test both .name and .name() (for backward compatibility)
143         stream = tutil.MockStreamReader()
144         sfile = StreamFileReader(stream, [[0, 0, 0]], 'nametest')
145         self.assertEqual('nametest', sfile.name)
146         self.assertEqual('nametest', sfile.name())
147
148     def check_decompression(self, compress_ext, compress_func):
149         test_text = 'decompression\ntest\n'
150         test_data = compress_func(test_text)
151         stream = tutil.MockStreamReader('.', test_data)
152         reader = StreamFileReader(stream, [[0, len(test_data), 0]],
153                                   'test.' + compress_ext)
154         self.assertEqual(test_text, ''.join(reader.readall_decompressed()))
155
156     @staticmethod
157     def gzip_compress(data):
158         compressed_data = io.BytesIO()
159         with gzip.GzipFile(fileobj=compressed_data, mode='wb') as gzip_file:
160             gzip_file.write(data)
161         return compressed_data.getvalue()
162
163     def test_no_decompression(self):
164         self.check_decompression('log', lambda s: s)
165
166     def test_gzip_decompression(self):
167         self.check_decompression('gz', self.gzip_compress)
168
169     def test_bz2_decompression(self):
170         self.check_decompression('bz2', bz2.compress)
171
172
173 class StreamRetryTestMixin(object):
174     # Define reader_for(coll_name, **kwargs)
175     # and read_for_test(reader, size, **kwargs).
176     API_COLLECTIONS = run_test_server.fixture('collections')
177
178     def keep_client(self):
179         return arvados.KeepClient(proxy='http://[%s]:1' % (tutil.TEST_HOST,),
180                                   local_store='')
181
182     def manifest_for(self, coll_name):
183         return self.API_COLLECTIONS[coll_name]['manifest_text']
184
185     @tutil.skip_sleep
186     def test_success_without_retries(self):
187         reader = self.reader_for('bar_file')
188         with tutil.mock_get_responses('bar', 200):
189             self.assertEqual('bar', self.read_for_test(reader, 3))
190
191     @tutil.skip_sleep
192     def test_read_no_default_retry(self):
193         reader = self.reader_for('user_agreement')
194         with tutil.mock_get_responses('', 500):
195             with self.assertRaises(arvados.errors.KeepReadError):
196                 self.read_for_test(reader, 10)
197
198     @tutil.skip_sleep
199     def test_read_with_instance_retries(self):
200         reader = self.reader_for('foo_file', num_retries=3)
201         with tutil.mock_get_responses('foo', 500, 200):
202             self.assertEqual('foo', self.read_for_test(reader, 3))
203
204     @tutil.skip_sleep
205     def test_read_with_method_retries(self):
206         reader = self.reader_for('foo_file')
207         with tutil.mock_get_responses('foo', 500, 200):
208             self.assertEqual('foo',
209                              self.read_for_test(reader, 3, num_retries=3))
210
211     @tutil.skip_sleep
212     def test_read_instance_retries_exhausted(self):
213         reader = self.reader_for('bar_file', num_retries=3)
214         with tutil.mock_get_responses('bar', 500, 500, 500, 500, 200):
215             with self.assertRaises(arvados.errors.KeepReadError):
216                 self.read_for_test(reader, 3)
217
218     @tutil.skip_sleep
219     def test_read_method_retries_exhausted(self):
220         reader = self.reader_for('bar_file')
221         with tutil.mock_get_responses('bar', 500, 500, 500, 500, 200):
222             with self.assertRaises(arvados.errors.KeepReadError):
223                 self.read_for_test(reader, 3, num_retries=3)
224
225     @tutil.skip_sleep
226     def test_method_retries_take_precedence(self):
227         reader = self.reader_for('user_agreement', num_retries=10)
228         with tutil.mock_get_responses('', 500, 500, 500, 200):
229             with self.assertRaises(arvados.errors.KeepReadError):
230                 self.read_for_test(reader, 10, num_retries=1)
231
232
233 class StreamReaderTestCase(unittest.TestCase, StreamRetryTestMixin):
234     def reader_for(self, coll_name, **kwargs):
235         return StreamReader(self.manifest_for(coll_name).split(),
236                             self.keep_client(), **kwargs)
237
238     def read_for_test(self, reader, byte_count, **kwargs):
239         return reader.readfrom(0, byte_count, **kwargs)
240
241     def test_manifest_text_without_keep_client(self):
242         mtext = self.manifest_for('multilevel_collection_1')
243         for line in mtext.rstrip('\n').split('\n'):
244             reader = StreamReader(line.split())
245             self.assertEqual(line + '\n', reader.manifest_text())
246
247
248 class StreamFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
249     def reader_for(self, coll_name, **kwargs):
250         return StreamReader(self.manifest_for(coll_name).split(),
251                             self.keep_client(), **kwargs).all_files()[0]
252
253     def read_for_test(self, reader, byte_count, **kwargs):
254         return reader.read(byte_count, **kwargs)
255
256
257 class StreamFileReadFromTestCase(StreamFileReadTestCase):
258     def read_for_test(self, reader, byte_count, **kwargs):
259         return reader.readfrom(0, byte_count, **kwargs)
260
261
262 class StreamFileReadAllTestCase(StreamFileReadTestCase):
263     def read_for_test(self, reader, byte_count, **kwargs):
264         return ''.join(reader.readall(**kwargs))
265
266
267 class StreamFileReadAllDecompressedTestCase(StreamFileReadTestCase):
268     def read_for_test(self, reader, byte_count, **kwargs):
269         return ''.join(reader.readall_decompressed(**kwargs))
270
271
272 class StreamFileReadlinesTestCase(StreamFileReadTestCase):
273     def read_for_test(self, reader, byte_count, **kwargs):
274         return ''.join(reader.readlines(**kwargs))
275
276 class StreamWriterTestCase(unittest.TestCase):
277     class MockKeep(object):
278         def __init__(self, blocks):
279             self.blocks = blocks
280         def get(self, locator, num_retries=0):
281             return self.blocks[locator]
282         def put(self, data):
283             pdh = "%s+%i" % (hashlib.md5(data).hexdigest(), len(data))
284             self.blocks[pdh] = str(data)
285             return pdh
286
287     def test_init(self):
288         stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
289                               keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
290         self.assertEqual("01234", stream.readfrom(0, 5))
291
292     def test_append(self):
293         stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
294                               keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
295         self.assertEqual("56789", stream.readfrom(5, 8))
296         stream.append("foo")
297         self.assertEqual("56789foo", stream.readfrom(5, 8))
298
299
300 class StreamFileWriterTestCase(unittest.TestCase):
301     def test_truncate(self):
302         stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
303                               keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
304         writer = stream.files()["count.txt"]
305         self.assertEqual("56789", writer.readfrom(5, 8))
306         writer.truncate(8)
307         self.assertEqual("567", writer.readfrom(5, 8))
308
309     def test_append(self):
310         stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
311                               keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
312         writer = stream.files()["count.txt"]
313         self.assertEqual("56789", writer.readfrom(5, 8))
314         writer.seek(10)
315         writer.write("foo")
316         self.assertEqual(writer.size(), 13)
317         self.assertEqual("56789foo", writer.readfrom(5, 8))
318
319     def test_write0(self):
320         stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
321                               keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
322         writer = stream.files()["count.txt"]
323         self.assertEqual("0123456789", writer.readfrom(0, 13))
324         writer.seek(0)
325         writer.write("foo")
326         self.assertEqual(writer.size(), 10)
327         self.assertEqual("foo3456789", writer.readfrom(0, 13))
328         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 10:3:count.txt 3:7:count.txt\n", stream.manifest_text())
329
330     def test_write1(self):
331         stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
332                               keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
333         writer = stream.files()["count.txt"]
334         self.assertEqual("0123456789", writer.readfrom(0, 13))
335         writer.seek(3)
336         writer.write("foo")
337         self.assertEqual(writer.size(), 10)
338         self.assertEqual("012foo6789", writer.readfrom(0, 13))
339         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", stream.manifest_text())
340
341     def test_write2(self):
342         stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
343                               keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
344         writer = stream.files()["count.txt"]
345         self.assertEqual("0123456789", writer.readfrom(0, 13))
346         writer.seek(7)
347         writer.write("foo")
348         self.assertEqual(writer.size(), 10)
349         self.assertEqual("0123456foo", writer.readfrom(0, 13))
350         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:7:count.txt 10:3:count.txt\n", stream.manifest_text())
351
352     def test_write3(self):
353         stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt', '0:10:count.txt'],
354                               keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
355         writer = stream.files()["count.txt"]
356         self.assertEqual("012345678901234", writer.readfrom(0, 15))
357         writer.seek(7)
358         writer.write("foobar")
359         self.assertEqual(writer.size(), 20)
360         self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
361         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", stream.manifest_text())
362
363     def test_write4(self):
364         stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:4:count.txt', '0:4:count.txt', '0:4:count.txt'],
365                               keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
366         writer = stream.files()["count.txt"]
367         self.assertEqual("012301230123", writer.readfrom(0, 15))
368         writer.seek(2)
369         writer.write("abcdefg")
370         self.assertEqual(writer.size(), 12)
371         self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
372         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", stream.manifest_text())
373
374     def test_write_large(self):
375         stream = StreamWriter(['.', arvados.config.EMPTY_BLOCK_LOCATOR, '0:0:count.txt'],
376                               keep=StreamWriterTestCase.MockKeep({}))
377         writer = stream.files()["count.txt"]
378         text = ''.join(["0123456789" for a in xrange(0, 100)])
379         for b in xrange(0, 100000):
380             writer.write(text)
381         self.assertEqual(writer.size(), 100000000)
382         stream.commit()
383         self.assertEqual(". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n", stream.manifest_text())
384
385     def test_write_rewrite0(self):
386         stream = StreamWriter(['.', arvados.config.EMPTY_BLOCK_LOCATOR, '0:0:count.txt'],
387                               keep=StreamWriterTestCase.MockKeep({}))
388         writer = stream.files()["count.txt"]
389         for b in xrange(0, 10):
390             writer.seek(0, os.SEEK_SET)
391             writer.write("0123456789")
392         stream.commit()
393         self.assertEqual(writer.size(), 10)
394         self.assertEqual("0123456789", writer.readfrom(0, 20))
395         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", stream.manifest_text())
396
397     def test_write_rewrite1(self):
398         stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
399                               keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
400         writer = stream.files()["count.txt"]
401         for b in xrange(0, 10):
402             writer.seek(10, os.SEEK_SET)
403             writer.write("abcdefghij")
404         stream.commit()
405         self.assertEqual(writer.size(), 20)
406         self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
407         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:10:count.txt 10:10:count.txt\n", stream.manifest_text())
408
409     def test_write_rewrite2(self):
410         stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
411                               keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
412         writer = stream.files()["count.txt"]
413         for b in xrange(0, 10):
414             writer.seek(5, os.SEEK_SET)
415             writer.write("abcdefghij")
416         stream.commit()
417         self.assertEqual(writer.size(), 15)
418         self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
419         self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", stream.manifest_text())
420
421 if __name__ == '__main__':
422     unittest.main()