12 from arvados import StreamReader, StreamFileReader, StreamWriter, StreamFileWriter
14 import arvados_testutil as tutil
15 import run_test_server
17 class StreamFileReaderTestCase(unittest.TestCase):
18 def make_count_reader(self):
19 stream = tutil.MockStreamReader('.', '01234', '34567', '67890')
20 return StreamFileReader(stream, [[1, 3, 0], [6, 3, 3], [11, 3, 6]],
23 def test_read_returns_first_block(self):
24 # read() calls will be aligned on block boundaries - see #3663.
25 sfile = self.make_count_reader()
26 self.assertEqual('123', sfile.read(10))
28 def test_small_read(self):
29 sfile = self.make_count_reader()
30 self.assertEqual('12', sfile.read(2))
32 def test_successive_reads(self):
33 sfile = self.make_count_reader()
34 for expect in ['123', '456', '789', '']:
35 self.assertEqual(expect, sfile.read(10))
37 def test_readfrom_spans_blocks(self):
38 sfile = self.make_count_reader()
39 self.assertEqual('6789', sfile.readfrom(5, 12))
41 def test_small_readfrom_spanning_blocks(self):
42 sfile = self.make_count_reader()
43 self.assertEqual('2345', sfile.readfrom(1, 4))
45 def test_readall(self):
46 sfile = self.make_count_reader()
47 self.assertEqual('123456789', ''.join(sfile.readall()))
49 def test_one_arg_seek(self):
50 self.test_relative_seek([])
52 def test_absolute_seek(self, args=[os.SEEK_SET]):
53 sfile = self.make_count_reader()
55 self.assertEqual('78', sfile.read(2))
57 self.assertEqual('56', sfile.read(2))
59 def test_relative_seek(self, args=[os.SEEK_CUR]):
60 sfile = self.make_count_reader()
61 self.assertEqual('12', sfile.read(2))
63 self.assertEqual('56', sfile.read(2))
65 def test_end_seek(self):
66 sfile = self.make_count_reader()
67 sfile.seek(-6, os.SEEK_END)
68 self.assertEqual('45', sfile.read(2))
70 def test_seek_min_zero(self):
71 sfile = self.make_count_reader()
72 sfile.seek(-2, os.SEEK_SET)
73 self.assertEqual(0, sfile.tell())
75 def test_seek_max_size(self):
76 sfile = self.make_count_reader()
77 sfile.seek(2, os.SEEK_END)
78 self.assertEqual(9, sfile.tell())
81 self.assertEqual(9, self.make_count_reader().size())
83 def test_tell_after_block_read(self):
84 sfile = self.make_count_reader()
86 self.assertEqual(3, sfile.tell())
88 def test_tell_after_small_read(self):
89 sfile = self.make_count_reader()
91 self.assertEqual(1, sfile.tell())
93 def test_no_read_after_close(self):
94 sfile = self.make_count_reader()
96 self.assertRaises(ValueError, sfile.read, 2)
98 def test_context(self):
99 with self.make_count_reader() as sfile:
100 self.assertFalse(sfile.closed, "reader is closed inside context")
101 self.assertEqual('12', sfile.read(2))
102 self.assertTrue(sfile.closed, "reader is open after context")
104 def make_newlines_reader(self):
105 stream = tutil.MockStreamReader('.', 'one\ntwo\n\nth', 'ree\nfour\n\n')
106 return StreamFileReader(stream, [[0, 11, 0], [11, 10, 11]], 'count.txt')
108 def check_lines(self, actual):
109 self.assertEqual(['one\n', 'two\n', '\n', 'three\n', 'four\n', '\n'],
112 def test_readline(self):
113 reader = self.make_newlines_reader()
116 data = reader.readline()
120 self.check_lines(actual)
122 def test_readlines(self):
123 self.check_lines(self.make_newlines_reader().readlines())
125 def test_iteration(self):
126 self.check_lines(list(iter(self.make_newlines_reader())))
128 def test_readline_size(self):
129 reader = self.make_newlines_reader()
130 self.assertEqual('on', reader.readline(2))
131 self.assertEqual('e\n', reader.readline(4))
132 self.assertEqual('two\n', reader.readline(6))
133 self.assertEqual('\n', reader.readline(8))
134 self.assertEqual('thre', reader.readline(4))
136 def test_readlines_sizehint(self):
137 result = self.make_newlines_reader().readlines(8)
138 self.assertEqual(['one\n', 'two\n'], result[:2])
139 self.assertNotIn('three\n', result)
141 def test_name_attribute(self):
142 # Test both .name and .name() (for backward compatibility)
143 stream = tutil.MockStreamReader()
144 sfile = StreamFileReader(stream, [[0, 0, 0]], 'nametest')
145 self.assertEqual('nametest', sfile.name)
146 self.assertEqual('nametest', sfile.name())
148 def check_decompression(self, compress_ext, compress_func):
149 test_text = 'decompression\ntest\n'
150 test_data = compress_func(test_text)
151 stream = tutil.MockStreamReader('.', test_data)
152 reader = StreamFileReader(stream, [[0, len(test_data), 0]],
153 'test.' + compress_ext)
154 self.assertEqual(test_text, ''.join(reader.readall_decompressed()))
157 def gzip_compress(data):
158 compressed_data = io.BytesIO()
159 with gzip.GzipFile(fileobj=compressed_data, mode='wb') as gzip_file:
160 gzip_file.write(data)
161 return compressed_data.getvalue()
163 def test_no_decompression(self):
164 self.check_decompression('log', lambda s: s)
166 def test_gzip_decompression(self):
167 self.check_decompression('gz', self.gzip_compress)
169 def test_bz2_decompression(self):
170 self.check_decompression('bz2', bz2.compress)
173 class StreamRetryTestMixin(object):
174 # Define reader_for(coll_name, **kwargs)
175 # and read_for_test(reader, size, **kwargs).
176 API_COLLECTIONS = run_test_server.fixture('collections')
178 def keep_client(self):
179 return arvados.KeepClient(proxy='http://[%s]:1' % (tutil.TEST_HOST,),
182 def manifest_for(self, coll_name):
183 return self.API_COLLECTIONS[coll_name]['manifest_text']
186 def test_success_without_retries(self):
187 reader = self.reader_for('bar_file')
188 with tutil.mock_get_responses('bar', 200):
189 self.assertEqual('bar', self.read_for_test(reader, 3))
192 def test_read_no_default_retry(self):
193 reader = self.reader_for('user_agreement')
194 with tutil.mock_get_responses('', 500):
195 with self.assertRaises(arvados.errors.KeepReadError):
196 self.read_for_test(reader, 10)
199 def test_read_with_instance_retries(self):
200 reader = self.reader_for('foo_file', num_retries=3)
201 with tutil.mock_get_responses('foo', 500, 200):
202 self.assertEqual('foo', self.read_for_test(reader, 3))
205 def test_read_with_method_retries(self):
206 reader = self.reader_for('foo_file')
207 with tutil.mock_get_responses('foo', 500, 200):
208 self.assertEqual('foo',
209 self.read_for_test(reader, 3, num_retries=3))
212 def test_read_instance_retries_exhausted(self):
213 reader = self.reader_for('bar_file', num_retries=3)
214 with tutil.mock_get_responses('bar', 500, 500, 500, 500, 200):
215 with self.assertRaises(arvados.errors.KeepReadError):
216 self.read_for_test(reader, 3)
219 def test_read_method_retries_exhausted(self):
220 reader = self.reader_for('bar_file')
221 with tutil.mock_get_responses('bar', 500, 500, 500, 500, 200):
222 with self.assertRaises(arvados.errors.KeepReadError):
223 self.read_for_test(reader, 3, num_retries=3)
226 def test_method_retries_take_precedence(self):
227 reader = self.reader_for('user_agreement', num_retries=10)
228 with tutil.mock_get_responses('', 500, 500, 500, 200):
229 with self.assertRaises(arvados.errors.KeepReadError):
230 self.read_for_test(reader, 10, num_retries=1)
233 class StreamReaderTestCase(unittest.TestCase, StreamRetryTestMixin):
234 def reader_for(self, coll_name, **kwargs):
235 return StreamReader(self.manifest_for(coll_name).split(),
236 self.keep_client(), **kwargs)
238 def read_for_test(self, reader, byte_count, **kwargs):
239 return reader.readfrom(0, byte_count, **kwargs)
241 def test_manifest_text_without_keep_client(self):
242 mtext = self.manifest_for('multilevel_collection_1')
243 for line in mtext.rstrip('\n').split('\n'):
244 reader = StreamReader(line.split())
245 self.assertEqual(line + '\n', reader.manifest_text())
248 class StreamFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
249 def reader_for(self, coll_name, **kwargs):
250 return StreamReader(self.manifest_for(coll_name).split(),
251 self.keep_client(), **kwargs).all_files()[0]
253 def read_for_test(self, reader, byte_count, **kwargs):
254 return reader.read(byte_count, **kwargs)
257 class StreamFileReadFromTestCase(StreamFileReadTestCase):
258 def read_for_test(self, reader, byte_count, **kwargs):
259 return reader.readfrom(0, byte_count, **kwargs)
262 class StreamFileReadAllTestCase(StreamFileReadTestCase):
263 def read_for_test(self, reader, byte_count, **kwargs):
264 return ''.join(reader.readall(**kwargs))
267 class StreamFileReadAllDecompressedTestCase(StreamFileReadTestCase):
268 def read_for_test(self, reader, byte_count, **kwargs):
269 return ''.join(reader.readall_decompressed(**kwargs))
272 class StreamFileReadlinesTestCase(StreamFileReadTestCase):
273 def read_for_test(self, reader, byte_count, **kwargs):
274 return ''.join(reader.readlines(**kwargs))
276 class StreamWriterTestCase(unittest.TestCase):
277 class MockKeep(object):
278 def __init__(self, blocks):
280 def get(self, locator, num_retries=0):
281 return self.blocks[locator]
283 pdh = "%s+%i" % (hashlib.md5(data).hexdigest(), len(data))
284 self.blocks[pdh] = data
288 stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
289 keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
290 self.assertEqual("01234", stream.readfrom(0, 5))
292 def test_append(self):
293 stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
294 keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
295 self.assertEqual("56789", stream.readfrom(5, 8))
297 self.assertEqual("56789foo", stream.readfrom(5, 8))
300 class StreamFileWriterTestCase(unittest.TestCase):
301 def test_truncate(self):
302 stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
303 keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
304 writer = stream.files()["count.txt"]
305 self.assertEqual("56789", writer.readfrom(5, 8))
307 self.assertEqual("567", writer.readfrom(5, 8))
309 def test_append(self):
310 stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
311 keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
312 writer = stream.files()["count.txt"]
313 self.assertEqual("56789", writer.readfrom(5, 8))
316 self.assertEqual(writer.size(), 13)
317 self.assertEqual("56789foo", writer.readfrom(5, 8))
319 def test_write0(self):
320 stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
321 keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
322 writer = stream.files()["count.txt"]
323 self.assertEqual("0123456789", writer.readfrom(0, 13))
326 self.assertEqual(writer.size(), 10)
327 self.assertEqual("foo3456789", writer.readfrom(0, 13))
328 self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 10:3:count.txt 3:7:count.txt\n", stream.manifest_text())
330 def test_write1(self):
331 stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
332 keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
333 writer = stream.files()["count.txt"]
334 self.assertEqual("0123456789", writer.readfrom(0, 13))
337 self.assertEqual(writer.size(), 10)
338 self.assertEqual("012foo6789", writer.readfrom(0, 13))
339 self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", stream.manifest_text())
341 def test_write2(self):
342 stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt'],
343 keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
344 writer = stream.files()["count.txt"]
345 self.assertEqual("0123456789", writer.readfrom(0, 13))
348 self.assertEqual(writer.size(), 10)
349 self.assertEqual("0123456foo", writer.readfrom(0, 13))
350 self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:7:count.txt 10:3:count.txt\n", stream.manifest_text())
352 def test_write3(self):
353 stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:10:count.txt', '0:10:count.txt'],
354 keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
355 writer = stream.files()["count.txt"]
356 self.assertEqual("012345678901234", writer.readfrom(0, 15))
358 writer.write("foobar")
359 self.assertEqual(writer.size(), 20)
360 self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
361 self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", stream.manifest_text())
363 def test_write4(self):
364 stream = StreamWriter(['.', '781e5e245d69b566979b86e28d23f2c7+10', '0:4:count.txt', '0:4:count.txt', '0:4:count.txt'],
365 keep=StreamWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"}))
366 writer = stream.files()["count.txt"]
367 self.assertEqual("012301230123", writer.readfrom(0, 15))
369 writer.write("abcdefg")
370 self.assertEqual(writer.size(), 12)
371 self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
372 self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 bufferblock0 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", stream.manifest_text())
374 def test_write_large(self):
375 stream = StreamWriter(['.', arvados.config.EMPTY_BLOCK_LOCATOR, '0:0:count.txt'],
376 keep=StreamWriterTestCase.MockKeep({}))
377 writer = stream.files()["count.txt"]
378 text = ''.join(["0123456789" for a in xrange(0, 100)])
379 for b in xrange(0, 100000):
381 self.assertEqual(writer.size(), 100000000)
383 self.assertEqual(". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n", stream.manifest_text())
385 def test_write_rewrite(self):
386 stream = StreamWriter(['.', arvados.config.EMPTY_BLOCK_LOCATOR, '0:0:count.txt'],
387 keep=StreamWriterTestCase.MockKeep({}))
388 writer = stream.files()["count.txt"]
389 for b in xrange(0, 10):
390 writer.seek(0, os.SEEK_SET)
391 writer.write("0123456789")
393 self.assertEqual(writer.size(), 10)
394 self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", stream.manifest_text())
396 if __name__ == '__main__':