X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/ba917d72d48615cdd0c6da87d41b6bd0f9f26666..950e76c1dc342ffe79080c9ef911e841675b7b0b:/sdk/python/tests/test_arvfile.py diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py index a800913055..20e40527fd 100644 --- a/sdk/python/tests/test_arvfile.py +++ b/sdk/python/tests/test_arvfile.py @@ -1,41 +1,41 @@ -from __future__ import absolute_import -from builtins import hex -from builtins import str -from builtins import range -from builtins import object -import bz2 +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + import datetime -import gzip -import io -import mock import os -import unittest import time +import unittest + +from unittest import mock import arvados from arvados._ranges import Range from arvados.keep import KeepLocator -from arvados.collection import Collection, CollectionReader +from arvados.collection import Collection from arvados.arvfile import ArvadosFile, ArvadosFileReader from . import arvados_testutil as tutil -from .test_stream import StreamFileReaderTestCase, StreamRetryTestMixin +from .test_stream import StreamFileReaderTestMixin, StreamRetryTestMixin class ArvadosFileWriterTestCase(unittest.TestCase): class MockKeep(object): def __init__(self, blocks): self.blocks = blocks self.requests = [] - def get(self, locator, num_retries=0): + self.num_prefetch_threads = 1 + def get(self, locator, num_retries=0, prefetch=False): self.requests.append(locator) return self.blocks.get(locator) def get_from_cache(self, locator): self.requests.append(locator) return self.blocks.get(locator) - def put(self, data, num_retries=None, copies=None): + def put(self, data, num_retries=None, copies=None, classes=[]): pdh = tutil.str_keep_locator(data) self.blocks[pdh] = bytes(data) return pdh + def block_prefetch(self, loc): + self.requests.append(loc) class MockApi(object): def __init__(self, b, r): @@ -169,8 +169,6 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator()) self.assertFalse(c.modified()) - - def test_write_to_end(self): keep = ArvadosFileWriterTestCase.MockKeep({ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789", @@ -213,26 +211,41 @@ class ArvadosFileWriterTestCase(unittest.TestCase): keep = ArvadosFileWriterTestCase.MockKeep({ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789", }) - c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep) - writer = c.open("count.txt", "ab+") - self.assertEqual(writer.read(20), b"0123456789") - - writer.seek(0, os.SEEK_SET) - writer.write("hello") - self.assertEqual(writer.read(), b"") - writer.seek(-5, os.SEEK_CUR) - self.assertEqual(writer.read(3), b"hel") - self.assertEqual(writer.read(), b"lo") - writer.seek(0, os.SEEK_SET) - self.assertEqual(writer.read(), b"0123456789hello") - - writer.seek(0) - writer.write("world") - self.assertEqual(writer.read(), b"") - writer.seek(0) - self.assertEqual(writer.read(), b"0123456789helloworld") - - self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text()) + for (mode, convert) in ( + ('a+', lambda data: data.decode(encoding='utf-8')), + ('at+', lambda data: data.decode(encoding='utf-8')), + ('ab+', lambda data: data)): + c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep) + writer = c.open("count.txt", mode) + self.assertEqual(writer.read(20), convert(b"0123456789")) + + writer.seek(0, os.SEEK_SET) + writer.write(convert(b"hello")) + self.assertEqual(writer.read(), convert(b"")) + if 'b' in mode: + writer.seek(-5, os.SEEK_CUR) + self.assertEqual(writer.read(3), convert(b"hel")) + self.assertEqual(writer.read(), convert(b"lo")) + else: + with self.assertRaises(IOError): + writer.seek(-5, os.SEEK_CUR) + with self.assertRaises(IOError): + writer.seek(-3, os.SEEK_END) + writer.seek(0, os.SEEK_SET) + writer.read(7) + self.assertEqual(7, writer.tell()) + self.assertEqual(7, writer.seek(7, os.SEEK_SET)) + + writer.seek(0, os.SEEK_SET) + self.assertEqual(writer.read(), convert(b"0123456789hello")) + + writer.seek(0) + writer.write(convert(b"world")) + self.assertEqual(writer.read(), convert(b"")) + writer.seek(0) + self.assertEqual(writer.read(), convert(b"0123456789helloworld")) + + self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text()) def test_write_at_beginning(self): keep = ArvadosFileWriterTestCase.MockKeep({ @@ -347,7 +360,6 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator()) self.assertFalse(c.modified()) - def test_large_write(self): keep = ArvadosFileWriterTestCase.MockKeep({}) api = ArvadosFileWriterTestCase.MockApi({}, {}) @@ -381,7 +393,6 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:2:count.txt 67108864:10:count.txt\n") - def test_sparse_write2(self): keep = ArvadosFileWriterTestCase.MockKeep({}) api = ArvadosFileWriterTestCase.MockApi({}, {}) @@ -398,12 +409,11 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:67108864:count.txt 0:67108864:count.txt 0:2:count.txt 67108864:10:count.txt\n") - def test_sparse_write3(self): keep = ArvadosFileWriterTestCase.MockKeep({}) api = ArvadosFileWriterTestCase.MockApi({}, {}) for r in [[0, 1, 2, 3, 4], [4, 3, 2, 1, 0], [3, 2, 0, 4, 1]]: - with Collection() as c: + with Collection(api_client=api, keep_client=keep) as c: writer = c.open("count.txt", "rb+") self.assertEqual(writer.size(), 0) @@ -418,7 +428,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase): keep = ArvadosFileWriterTestCase.MockKeep({}) api = ArvadosFileWriterTestCase.MockApi({}, {}) for r in [[0, 1, 2, 4], [4, 2, 1, 0], [2, 0, 4, 1]]: - with Collection() as c: + with Collection(api_client=api, keep_client=keep) as c: writer = c.open("count.txt", "rb+") self.assertEqual(writer.size(), 0) @@ -429,7 +439,6 @@ class ArvadosFileWriterTestCase(unittest.TestCase): writer.seek(0) self.assertEqual(writer.read(), b"000000000011111111112222222222\x00\x00\x00\x00\x00\x00\x00\x00\x00\x004444444444") - def test_rewrite_on_empty_file(self): keep = ArvadosFileWriterTestCase.MockKeep({}) with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt', @@ -611,12 +620,14 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual(b"01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8")) -class ArvadosFileReaderTestCase(StreamFileReaderTestCase): +class ArvadosFileReaderTestCase(unittest.TestCase, StreamFileReaderTestMixin): class MockParent(object): class MockBlockMgr(object): def __init__(self, blocks, nocache): self.blocks = blocks self.nocache = nocache + self._keep = ArvadosFileWriterTestCase.MockKeep({}) + self.prefetch_lookahead = 0 def block_prefetch(self, loc): pass @@ -638,6 +649,11 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase): return ArvadosFileReaderTestCase.MockParent.MockBlockMgr(self.blocks, self.nocache) + def make_file_reader(self, name='emptyfile', data='', nocache=False): + loc = tutil.str_keep_locator(data) + af = ArvadosFile(ArvadosFileReaderTestCase.MockParent({loc: data}, nocache=nocache), name, stream=[Range(loc, 0, len(data))], segments=[Range(0, len(data), len(data))]) + return ArvadosFileReader(af, mode='rb') + def make_count_reader(self, nocache=False): stream = [] n = 0 @@ -647,7 +663,21 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase): blocks[loc] = d stream.append(Range(loc, n, len(d))) n += len(d) - af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), "count.txt", stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)]) + af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache=nocache), "count.txt", stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)]) + return ArvadosFileReader(af, mode="rb") + + def make_newlines_reader(self, nocache=False): + stream = [] + segments = [] + n = 0 + blocks = {} + for d in [b'one\ntwo\n\nth', b'ree\nfour\n\n']: + loc = tutil.str_keep_locator(d) + blocks[loc] = d + stream.append(Range(loc, n, len(d))) + segments.append(Range(n, len(d), n+len(d))) + n += len(d) + af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache=nocache), "count.txt", stream=stream, segments=segments) return ArvadosFileReader(af, mode="rb") def test_read_block_crossing_behavior(self): @@ -656,16 +686,7 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase): sfile = self.make_count_reader(nocache=True) self.assertEqual(b'12345678', sfile.read(8)) - def test_successive_reads(self): - # Override StreamFileReaderTestCase.test_successive_reads - sfile = self.make_count_reader(nocache=True) - self.assertEqual(b'1234', sfile.read(4)) - self.assertEqual(b'5678', sfile.read(4)) - self.assertEqual(b'9', sfile.read(4)) - self.assertEqual(b'', sfile.read(4)) - def test_tell_after_block_read(self): - # Override StreamFileReaderTestCase.test_tell_after_block_read sfile = self.make_count_reader(nocache=True) self.assertEqual(b'12345678', sfile.read(8)) self.assertEqual(8, sfile.tell()) @@ -678,8 +699,60 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase): with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c: r = c.open("count.txt", "rb") self.assertEqual(b"0123", r.read(4)) - self.assertIn("2e9ec317e197819358fbc43afca7d837+8", keep.requests) - self.assertIn("e8dc4081b13434b45189a720b77b6818+8", keep.requests) + self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8", + "e8dc4081b13434b45189a720b77b6818+8"], keep.requests) + + def test_prefetch_disabled(self): + keep = ArvadosFileWriterTestCase.MockKeep({ + "2e9ec317e197819358fbc43afca7d837+8": b"01234567", + "e8dc4081b13434b45189a720b77b6818+8": b"abcdefgh", + }) + keep.num_prefetch_threads = 0 + with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c: + r = c.open("count.txt", "rb") + self.assertEqual(b"0123", r.read(4)) + + self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8"], keep.requests) + + def test_prefetch_first_read_only(self): + # test behavior that prefetch only happens every 128 reads + # check that it doesn't make a prefetch request on the second read + keep = ArvadosFileWriterTestCase.MockKeep({ + "2e9ec317e197819358fbc43afca7d837+8": b"01234567", + "e8dc4081b13434b45189a720b77b6818+8": b"abcdefgh", + }) + with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c: + r = c.open("count.txt", "rb") + self.assertEqual(b"0123", r.read(4)) + self.assertEqual(b"45", r.read(2)) + self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8", + "e8dc4081b13434b45189a720b77b6818+8", + "2e9ec317e197819358fbc43afca7d837+8"], keep.requests) + self.assertEqual(3, len(keep.requests)) + + def test_prefetch_again(self): + # test behavior that prefetch only happens every 128 reads + # check that it does make another prefetch request after 128 reads + keep = ArvadosFileWriterTestCase.MockKeep({ + "2e9ec317e197819358fbc43afca7d837+8": b"01234567", + "e8dc4081b13434b45189a720b77b6818+8": b"abcdefgh", + }) + with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c: + r = c.open("count.txt", "rb") + for i in range(0, 129): + r.seek(0) + self.assertEqual(b"0123", r.read(4)) + self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8", + "e8dc4081b13434b45189a720b77b6818+8", + "2e9ec317e197819358fbc43afca7d837+8", + "2e9ec317e197819358fbc43afca7d837+8"], keep.requests[0:4]) + self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8", + "2e9ec317e197819358fbc43afca7d837+8", + "2e9ec317e197819358fbc43afca7d837+8", + "e8dc4081b13434b45189a720b77b6818+8"], keep.requests[127:131]) + # gets the 1st block 129 times from keep (cache), + # and the 2nd block twice to get 131 requests + self.assertEqual(131, len(keep.requests)) def test__eq__from_manifest(self): with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1: @@ -832,7 +905,7 @@ class BlockManagerTest(unittest.TestCase): mockkeep = mock.MagicMock() with arvados.arvfile._BlockManager(mockkeep) as blockmanager: bufferblock = blockmanager.alloc_bufferblock() - bufferblock.owner = mock.MagicMock() + bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile) def flush(sync=None): blockmanager.commit_bufferblock(bufferblock, sync) bufferblock.owner.flush.side_effect = flush @@ -846,7 +919,7 @@ class BlockManagerTest(unittest.TestCase): def test_bufferblock_commit_pending(self): # Test for bug #7225 mockkeep = mock.MagicMock() - mockkeep.put.side_effect = lambda x: time.sleep(1) + mockkeep.put.side_effect = lambda *args, **kwargs: time.sleep(1) with arvados.arvfile._BlockManager(mockkeep) as blockmanager: bufferblock = blockmanager.alloc_bufferblock() bufferblock.append("foo") @@ -857,13 +930,12 @@ class BlockManagerTest(unittest.TestCase): blockmanager.commit_bufferblock(bufferblock, True) self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED) - def test_bufferblock_commit_with_error(self): mockkeep = mock.MagicMock() mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail") with arvados.arvfile._BlockManager(mockkeep) as blockmanager: bufferblock = blockmanager.alloc_bufferblock() - bufferblock.owner = mock.MagicMock() + bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile) def flush(sync=None): blockmanager.commit_bufferblock(bufferblock, sync) bufferblock.owner.flush.side_effect = flush