X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/6f779a6bb29c2139f1b6f9eda7b472117fb8e4b8..HEAD:/sdk/python/tests/test_arvfile.py diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py index 086fa542a2..6bcba9a81d 100644 --- a/sdk/python/tests/test_arvfile.py +++ b/sdk/python/tests/test_arvfile.py @@ -2,24 +2,17 @@ # # SPDX-License-Identifier: Apache-2.0 -from __future__ import absolute_import -from builtins import hex -from builtins import str -from builtins import range -from builtins import object -import bz2 import datetime -import gzip -import io -import mock import os -import unittest import time +import unittest + +from unittest import mock import arvados from arvados._ranges import Range from arvados.keep import KeepLocator -from arvados.collection import Collection, CollectionReader +from arvados.collection import Collection from arvados.arvfile import ArvadosFile, ArvadosFileReader from . import arvados_testutil as tutil @@ -30,16 +23,19 @@ class ArvadosFileWriterTestCase(unittest.TestCase): def __init__(self, blocks): self.blocks = blocks self.requests = [] - def get(self, locator, num_retries=0): + self.num_prefetch_threads = 1 + def get(self, locator, num_retries=0, prefetch=False): self.requests.append(locator) return self.blocks.get(locator) def get_from_cache(self, locator): self.requests.append(locator) return self.blocks.get(locator) - def put(self, data, num_retries=None, copies=None): + def put(self, data, num_retries=None, copies=None, classes=[]): pdh = tutil.str_keep_locator(data) self.blocks[pdh] = bytes(data) return pdh + def block_prefetch(self, loc): + self.requests.append(loc) class MockApi(object): def __init__(self, b, r): @@ -173,8 +169,6 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator()) self.assertFalse(c.modified()) - - def test_write_to_end(self): keep = ArvadosFileWriterTestCase.MockKeep({ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789", @@ -366,7 +360,6 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator()) self.assertFalse(c.modified()) - def test_large_write(self): keep = ArvadosFileWriterTestCase.MockKeep({}) api = ArvadosFileWriterTestCase.MockApi({}, {}) @@ -400,7 +393,6 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:2:count.txt 67108864:10:count.txt\n") - def test_sparse_write2(self): keep = ArvadosFileWriterTestCase.MockKeep({}) api = ArvadosFileWriterTestCase.MockApi({}, {}) @@ -417,12 +409,11 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:67108864:count.txt 0:67108864:count.txt 0:2:count.txt 67108864:10:count.txt\n") - def test_sparse_write3(self): keep = ArvadosFileWriterTestCase.MockKeep({}) api = ArvadosFileWriterTestCase.MockApi({}, {}) for r in [[0, 1, 2, 3, 4], [4, 3, 2, 1, 0], [3, 2, 0, 4, 1]]: - with Collection() as c: + with Collection(api_client=api, keep_client=keep) as c: writer = c.open("count.txt", "rb+") self.assertEqual(writer.size(), 0) @@ -437,7 +428,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase): keep = ArvadosFileWriterTestCase.MockKeep({}) api = ArvadosFileWriterTestCase.MockApi({}, {}) for r in [[0, 1, 2, 4], [4, 2, 1, 0], [2, 0, 4, 1]]: - with Collection() as c: + with Collection(api_client=api, keep_client=keep) as c: writer = c.open("count.txt", "rb+") self.assertEqual(writer.size(), 0) @@ -448,7 +439,6 @@ class ArvadosFileWriterTestCase(unittest.TestCase): writer.seek(0) self.assertEqual(writer.read(), b"000000000011111111112222222222\x00\x00\x00\x00\x00\x00\x00\x00\x00\x004444444444") - def test_rewrite_on_empty_file(self): keep = ArvadosFileWriterTestCase.MockKeep({}) with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt', @@ -636,6 +626,8 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase): def __init__(self, blocks, nocache): self.blocks = blocks self.nocache = nocache + self._keep = ArvadosFileWriterTestCase.MockKeep({}) + self.prefetch_lookahead = 0 def block_prefetch(self, loc): pass @@ -697,8 +689,60 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase): with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c: r = c.open("count.txt", "rb") self.assertEqual(b"0123", r.read(4)) - self.assertIn("2e9ec317e197819358fbc43afca7d837+8", keep.requests) - self.assertIn("e8dc4081b13434b45189a720b77b6818+8", keep.requests) + self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8", + "e8dc4081b13434b45189a720b77b6818+8"], keep.requests) + + def test_prefetch_disabled(self): + keep = ArvadosFileWriterTestCase.MockKeep({ + "2e9ec317e197819358fbc43afca7d837+8": b"01234567", + "e8dc4081b13434b45189a720b77b6818+8": b"abcdefgh", + }) + keep.num_prefetch_threads = 0 + with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c: + r = c.open("count.txt", "rb") + self.assertEqual(b"0123", r.read(4)) + + self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8"], keep.requests) + + def test_prefetch_first_read_only(self): + # test behavior that prefetch only happens every 128 reads + # check that it doesn't make a prefetch request on the second read + keep = ArvadosFileWriterTestCase.MockKeep({ + "2e9ec317e197819358fbc43afca7d837+8": b"01234567", + "e8dc4081b13434b45189a720b77b6818+8": b"abcdefgh", + }) + with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c: + r = c.open("count.txt", "rb") + self.assertEqual(b"0123", r.read(4)) + self.assertEqual(b"45", r.read(2)) + self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8", + "e8dc4081b13434b45189a720b77b6818+8", + "2e9ec317e197819358fbc43afca7d837+8"], keep.requests) + self.assertEqual(3, len(keep.requests)) + + def test_prefetch_again(self): + # test behavior that prefetch only happens every 128 reads + # check that it does make another prefetch request after 128 reads + keep = ArvadosFileWriterTestCase.MockKeep({ + "2e9ec317e197819358fbc43afca7d837+8": b"01234567", + "e8dc4081b13434b45189a720b77b6818+8": b"abcdefgh", + }) + with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c: + r = c.open("count.txt", "rb") + for i in range(0, 129): + r.seek(0) + self.assertEqual(b"0123", r.read(4)) + self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8", + "e8dc4081b13434b45189a720b77b6818+8", + "2e9ec317e197819358fbc43afca7d837+8", + "2e9ec317e197819358fbc43afca7d837+8"], keep.requests[0:4]) + self.assertEqual(["2e9ec317e197819358fbc43afca7d837+8", + "2e9ec317e197819358fbc43afca7d837+8", + "2e9ec317e197819358fbc43afca7d837+8", + "e8dc4081b13434b45189a720b77b6818+8"], keep.requests[127:131]) + # gets the 1st block 129 times from keep (cache), + # and the 2nd block twice to get 131 requests + self.assertEqual(131, len(keep.requests)) def test__eq__from_manifest(self): with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1: @@ -876,7 +920,6 @@ class BlockManagerTest(unittest.TestCase): blockmanager.commit_bufferblock(bufferblock, True) self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED) - def test_bufferblock_commit_with_error(self): mockkeep = mock.MagicMock() mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail")