X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/4a8883858028d4068b52710acd9c349108b6fc7c..348ef77de2aac95cdaff388da9602362ae269126:/sdk/python/tests/test_arvfile.py diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py index e0f6596741..0b8e7c8f8b 100644 --- a/sdk/python/tests/test_arvfile.py +++ b/sdk/python/tests/test_arvfile.py @@ -1,12 +1,13 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + from __future__ import absolute_import from builtins import hex from builtins import str from builtins import range from builtins import object -import bz2 import datetime -import gzip -import io import mock import os import unittest @@ -15,7 +16,7 @@ import time import arvados from arvados._ranges import Range from arvados.keep import KeepLocator -from arvados.collection import Collection, CollectionReader +from arvados.collection import Collection from arvados.arvfile import ArvadosFile, ArvadosFileReader from . import arvados_testutil as tutil @@ -32,7 +33,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase): def get_from_cache(self, locator): self.requests.append(locator) return self.blocks.get(locator) - def put(self, data, num_retries=None, copies=None): + def put(self, data, num_retries=None, copies=None, classes=[]): pdh = tutil.str_keep_locator(data) self.blocks[pdh] = bytes(data) return pdh @@ -79,7 +80,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase): "portable_data_hash":"7fcd0eaac3aad4c31a6a0e756475da92+52", }) with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', - api_client=api, keep_client=keep) as c: + api_client=api, keep_client=keep) as c: writer = c.open("count.txt", "rb+") self.assertEqual(writer.size(), 10) self.assertEqual(b"0123456789", writer.read(12)) @@ -99,6 +100,76 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator()) self.assertFalse(c.modified()) + + def test_truncate2(self): + keep = ArvadosFileWriterTestCase.MockKeep({ + "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789", + }) + api = ArvadosFileWriterTestCase.MockApi({ + "name": "test_truncate2", + "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n", + "replication_desired": None, + }, { + "uuid": "zzzzz-4zz18-mockcollection0", + "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 7f614da9329cd3aebf59b91aadc30bf0+67108864 0:12:count.txt\n", + "portable_data_hash": "272da898abdf86ddc71994835e3155f8+95", + }) + with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', + api_client=api, keep_client=keep) as c: + writer = c.open("count.txt", "rb+") + self.assertEqual(writer.size(), 10) + self.assertEqual(b"0123456789", writer.read(12)) + + # extend file size + writer.truncate(12) + + self.assertEqual(writer.size(), 12) + writer.seek(0, os.SEEK_SET) + self.assertEqual(b"0123456789\x00\x00", writer.read(12)) + + self.assertIsNone(c.manifest_locator()) + self.assertTrue(c.modified()) + c.save_new("test_truncate2") + self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator()) + self.assertFalse(c.modified()) + + def test_truncate3(self): + keep = ArvadosFileWriterTestCase.MockKeep({ + "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789", + "a925576942e94b2ef57a066101b48876+10": b"abcdefghij", + }) + api = ArvadosFileWriterTestCase.MockApi({ + "name": "test_truncate", + "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n", + "replication_desired": None, + }, { + "uuid": "zzzzz-4zz18-mockcollection0", + "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n", + "portable_data_hash": "7fcd0eaac3aad4c31a6a0e756475da92+52", + }) + with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n', + api_client=api, keep_client=keep) as c: + writer = c.open("count.txt", "rb+") + self.assertEqual(writer.size(), 20) + self.assertEqual(b"0123456789ab", writer.read(12)) + self.assertEqual(12, writer.tell()) + + writer.truncate(8) + + # Make sure reading off the end doesn't break + self.assertEqual(12, writer.tell()) + self.assertEqual(b"", writer.read(12)) + + self.assertEqual(writer.size(), 8) + self.assertEqual(2, writer.seek(-10, os.SEEK_CUR)) + self.assertEqual(b"234567", writer.read(12)) + + self.assertIsNone(c.manifest_locator()) + self.assertTrue(c.modified()) + c.save_new("test_truncate") + self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator()) + self.assertFalse(c.modified()) + def test_write_to_end(self): keep = ArvadosFileWriterTestCase.MockKeep({ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789", @@ -117,7 +188,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase): writer = c.open("count.txt", "rb+") self.assertEqual(writer.size(), 10) - writer.seek(5, os.SEEK_SET) + self.assertEqual(5, writer.seek(5, os.SEEK_SET)) self.assertEqual(b"56789", writer.read(8)) writer.seek(10, os.SEEK_SET) @@ -141,19 +212,41 @@ class ArvadosFileWriterTestCase(unittest.TestCase): keep = ArvadosFileWriterTestCase.MockKeep({ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789", }) - c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep) - writer = c.open("count.txt", "ab+") - self.assertEqual(writer.read(20), b"0123456789") - writer.seek(0, os.SEEK_SET) + for (mode, convert) in ( + ('a+', lambda data: data.decode(encoding='utf-8')), + ('at+', lambda data: data.decode(encoding='utf-8')), + ('ab+', lambda data: data)): + c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep) + writer = c.open("count.txt", mode) + self.assertEqual(writer.read(20), convert(b"0123456789")) + + writer.seek(0, os.SEEK_SET) + writer.write(convert(b"hello")) + self.assertEqual(writer.read(), convert(b"")) + if 'b' in mode: + writer.seek(-5, os.SEEK_CUR) + self.assertEqual(writer.read(3), convert(b"hel")) + self.assertEqual(writer.read(), convert(b"lo")) + else: + with self.assertRaises(IOError): + writer.seek(-5, os.SEEK_CUR) + with self.assertRaises(IOError): + writer.seek(-3, os.SEEK_END) + writer.seek(0, os.SEEK_SET) + writer.read(7) + self.assertEqual(7, writer.tell()) + self.assertEqual(7, writer.seek(7, os.SEEK_SET)) - writer.write("hello") - self.assertEqual(writer.read(20), b"0123456789hello") - writer.seek(0, os.SEEK_SET) + writer.seek(0, os.SEEK_SET) + self.assertEqual(writer.read(), convert(b"0123456789hello")) - writer.write("world") - self.assertEqual(writer.read(20), b"0123456789helloworld") + writer.seek(0) + writer.write(convert(b"world")) + self.assertEqual(writer.read(), convert(b"")) + writer.seek(0) + self.assertEqual(writer.read(), convert(b"0123456789helloworld")) - self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text()) + self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text()) def test_write_at_beginning(self): keep = ArvadosFileWriterTestCase.MockKeep({ @@ -268,7 +361,6 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator()) self.assertFalse(c.modified()) - def test_large_write(self): keep = ArvadosFileWriterTestCase.MockKeep({}) api = ArvadosFileWriterTestCase.MockApi({}, {}) @@ -285,6 +377,69 @@ class ArvadosFileWriterTestCase(unittest.TestCase): self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 48dd23ea1645fd47d789804d71b5bb8e+67108864 77c57dc6ac5a10bb2205caaa73187994+32891126 0:100000000:count.txt\n") + def test_sparse_write(self): + keep = ArvadosFileWriterTestCase.MockKeep({}) + api = ArvadosFileWriterTestCase.MockApi({}, {}) + with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt', + api_client=api, keep_client=keep) as c: + writer = c.open("count.txt", "rb+") + self.assertEqual(writer.size(), 0) + + text = b"0123456789" + writer.seek(2) + writer.write(text) + self.assertEqual(writer.size(), 12) + writer.seek(0, os.SEEK_SET) + self.assertEqual(writer.read(), b"\x00\x00"+text) + + self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:2:count.txt 67108864:10:count.txt\n") + + def test_sparse_write2(self): + keep = ArvadosFileWriterTestCase.MockKeep({}) + api = ArvadosFileWriterTestCase.MockApi({}, {}) + with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt', + api_client=api, keep_client=keep) as c: + writer = c.open("count.txt", "rb+") + self.assertEqual(writer.size(), 0) + + text = "0123456789" + writer.seek((arvados.config.KEEP_BLOCK_SIZE*2) + 2) + writer.write(text) + self.assertEqual(writer.size(), (arvados.config.KEEP_BLOCK_SIZE*2) + 12) + writer.seek(0, os.SEEK_SET) + + self.assertEqual(c.manifest_text(), ". 7f614da9329cd3aebf59b91aadc30bf0+67108864 781e5e245d69b566979b86e28d23f2c7+10 0:67108864:count.txt 0:67108864:count.txt 0:2:count.txt 67108864:10:count.txt\n") + + def test_sparse_write3(self): + keep = ArvadosFileWriterTestCase.MockKeep({}) + api = ArvadosFileWriterTestCase.MockApi({}, {}) + for r in [[0, 1, 2, 3, 4], [4, 3, 2, 1, 0], [3, 2, 0, 4, 1]]: + with Collection() as c: + writer = c.open("count.txt", "rb+") + self.assertEqual(writer.size(), 0) + + for i in r: + w = ("%s" % i) * 10 + writer.seek(i*10) + writer.write(w.encode()) + writer.seek(0) + self.assertEqual(writer.read(), b"00000000001111111111222222222233333333334444444444") + + def test_sparse_write4(self): + keep = ArvadosFileWriterTestCase.MockKeep({}) + api = ArvadosFileWriterTestCase.MockApi({}, {}) + for r in [[0, 1, 2, 4], [4, 2, 1, 0], [2, 0, 4, 1]]: + with Collection() as c: + writer = c.open("count.txt", "rb+") + self.assertEqual(writer.size(), 0) + + for i in r: + w = ("%s" % i) * 10 + writer.seek(i*10) + writer.write(w.encode()) + writer.seek(0) + self.assertEqual(writer.read(), b"000000000011111111112222222222\x00\x00\x00\x00\x00\x00\x00\x00\x00\x004444444444") + def test_rewrite_on_empty_file(self): keep = ArvadosFileWriterTestCase.MockKeep({}) with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt', @@ -346,10 +501,10 @@ class ArvadosFileWriterTestCase(unittest.TestCase): def test_write_large_rewrite(self): keep = ArvadosFileWriterTestCase.MockKeep({}) api = ArvadosFileWriterTestCase.MockApi({"name":"test_write_large", - "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n", + "manifest_text": ". 3dc0d4bc21f48060bedcb2c91af4f906+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 0:3:count.txt 32892006:67107997:count.txt 3:32892000:count.txt\n", "replication_desired":None}, {"uuid":"zzzzz-4zz18-mockcollection0", - "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n", + "manifest_text": ". 3dc0d4bc21f48060bedcb2c91af4f906+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 0:3:count.txt 32892006:67107997:count.txt 3:32892000:count.txt\n", "portable_data_hash":"217665c6b713e1b78dfba7ebd42344db+156"}) with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt', api_client=api, keep_client=keep) as c: @@ -687,7 +842,7 @@ class BlockManagerTest(unittest.TestCase): mockkeep = mock.MagicMock() with arvados.arvfile._BlockManager(mockkeep) as blockmanager: bufferblock = blockmanager.alloc_bufferblock() - bufferblock.owner = mock.MagicMock() + bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile) def flush(sync=None): blockmanager.commit_bufferblock(bufferblock, sync) bufferblock.owner.flush.side_effect = flush @@ -701,7 +856,7 @@ class BlockManagerTest(unittest.TestCase): def test_bufferblock_commit_pending(self): # Test for bug #7225 mockkeep = mock.MagicMock() - mockkeep.put.side_effect = lambda x: time.sleep(1) + mockkeep.put.side_effect = lambda *args, **kwargs: time.sleep(1) with arvados.arvfile._BlockManager(mockkeep) as blockmanager: bufferblock = blockmanager.alloc_bufferblock() bufferblock.append("foo") @@ -712,13 +867,12 @@ class BlockManagerTest(unittest.TestCase): blockmanager.commit_bufferblock(bufferblock, True) self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED) - def test_bufferblock_commit_with_error(self): mockkeep = mock.MagicMock() mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail") with arvados.arvfile._BlockManager(mockkeep) as blockmanager: bufferblock = blockmanager.alloc_bufferblock() - bufferblock.owner = mock.MagicMock() + bufferblock.owner = mock.MagicMock(spec=arvados.arvfile.ArvadosFile) def flush(sync=None): blockmanager.commit_bufferblock(bufferblock, sync) bufferblock.owner.flush.side_effect = flush