closes #7658
[arvados.git] / sdk / python / tests / test_arvfile.py
index c5dbb1692aee231a8336f2f3b423e6d1e25bc2f5..ea8661437c04f3c0d2dec8d4a01c0c8176a2e87c 100644 (file)
@@ -6,7 +6,7 @@ import io
 import mock
 import os
 import unittest
-import hashlib
+import time
 
 import arvados
 from arvados._ranges import Range
@@ -29,7 +29,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             self.requests.append(locator)
             return self.blocks.get(locator)
         def put(self, data, num_retries=None):
-            pdh = "%s+%i" % (hashlib.md5(data).hexdigest(), len(data))
+            pdh = tutil.str_keep_locator(data)
             self.blocks[pdh] = str(data)
             return pdh
 
@@ -77,11 +77,11 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             writer.seek(0, os.SEEK_SET)
             self.assertEqual("01234567", writer.read(12))
 
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
             c.save_new("test_truncate")
             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
 
     def test_write_to_end(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -104,13 +104,13 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             writer.seek(5, os.SEEK_SET)
             self.assertEqual("56789foo", writer.read(8))
 
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
-            self.assertEqual(None, keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
+            self.assertIsNone(keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
 
             c.save_new("test_append")
             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
             self.assertEqual("foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
 
 
@@ -228,16 +228,33 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
                              api_client=api, keep_client=keep) as c:
             writer = c.open("count.txt", "r+")
-            text = ''.join(["0123456789" for a in xrange(0, 100)])
+            text = "0123456789" * 100
             for b in xrange(0, 100000):
                 writer.write(text)
             self.assertEqual(writer.size(), 100000000)
 
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
             c.save_new("test_write_large")
             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
+
+
+    def test_large_write(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        api = ArvadosFileWriterTestCase.MockApi({}, {})
+        with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
+                             api_client=api, keep_client=keep) as c:
+            writer = c.open("count.txt", "r+")
+            self.assertEqual(writer.size(), 0)
+
+            text = "0123456789"
+            writer.write(text)
+            text = "0123456789" * 9999999
+            writer.write(text)
+            self.assertEqual(writer.size(), 100000000)
+
+            self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 48dd23ea1645fd47d789804d71b5bb8e+67108864 77c57dc6ac5a10bb2205caaa73187994+32891126 0:100000000:count.txt\n")
 
     def test_rewrite_on_empty_file(self):
         keep = ArvadosFileWriterTestCase.MockKeep({})
@@ -309,11 +326,11 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             writer.write("foo")
             self.assertEqual(writer.size(), 100000000)
 
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
             c.save_new("test_write_large")
             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
 
     def test_create(self):
         keep = ArvadosFileWriterTestCase.MockKeep({})
@@ -327,12 +344,12 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             writer.write("01234567")
             self.assertEqual(writer.size(), 8)
 
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
-            self.assertEqual(None, keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
+            self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
             c.save_new("test_create")
             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
             self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
 
 
@@ -346,7 +363,9 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             self.assertIsNone(c.api_response())
             writer = c.open("foo/bar/count.txt", "w+")
             writer.write("01234567")
+            self.assertFalse(c.committed())
             c.save_new("test_create")
+            self.assertTrue(c.committed())
             self.assertEqual(c.api_response(), api.response)
 
     def test_overwrite(self):
@@ -362,11 +381,11 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             writer.write("01234567")
             self.assertEqual(writer.size(), 8)
 
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
             c.save_new("test_overwrite")
             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
 
     def test_file_not_found(self):
         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
@@ -392,12 +411,12 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             self.assertEqual(w1.size(), 8)
             self.assertEqual(w2.size(), 8)
 
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
-            self.assertEqual(None, keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
+            self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
             c.save_new("test_create_multiple")
             self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
             self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
 
 
@@ -433,7 +452,7 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
         n = 0
         blocks = {}
         for d in ['01234', '34567', '67890']:
-            loc = '{}+{}'.format(hashlib.md5(d).hexdigest(), len(d))
+            loc = tutil.str_keep_locator(d)
             blocks[loc] = d
             stream.append(Range(loc, n, len(d)))
             n += len(d)
@@ -541,81 +560,99 @@ class ArvadosFileReadlinesTestCase(ArvadosFileReadTestCase):
 class BlockManagerTest(unittest.TestCase):
     def test_bufferblock_append(self):
         keep = ArvadosFileWriterTestCase.MockKeep({})
-        blockmanager = arvados.arvfile._BlockManager(keep)
-        bufferblock = blockmanager.alloc_bufferblock()
-        bufferblock.append("foo")
+        with arvados.arvfile._BlockManager(keep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.append("foo")
 
-        self.assertEqual(bufferblock.size(), 3)
-        self.assertEqual(bufferblock.buffer_view[0:3], "foo")
-        self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
+            self.assertEqual(bufferblock.size(), 3)
+            self.assertEqual(bufferblock.buffer_view[0:3], "foo")
+            self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
 
-        bufferblock.append("bar")
+            bufferblock.append("bar")
 
-        self.assertEqual(bufferblock.size(), 6)
-        self.assertEqual(bufferblock.buffer_view[0:6], "foobar")
-        self.assertEqual(bufferblock.locator(), "3858f62230ac3c915f300c664312c63f+6")
+            self.assertEqual(bufferblock.size(), 6)
+            self.assertEqual(bufferblock.buffer_view[0:6], "foobar")
+            self.assertEqual(bufferblock.locator(), "3858f62230ac3c915f300c664312c63f+6")
 
-        bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
-        with self.assertRaises(arvados.errors.AssertionError):
-            bufferblock.append("bar")
+            bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
+            with self.assertRaises(arvados.errors.AssertionError):
+                bufferblock.append("bar")
 
     def test_bufferblock_dup(self):
         keep = ArvadosFileWriterTestCase.MockKeep({})
-        blockmanager = arvados.arvfile._BlockManager(keep)
-        bufferblock = blockmanager.alloc_bufferblock()
-        bufferblock.append("foo")
+        with arvados.arvfile._BlockManager(keep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.append("foo")
 
-        self.assertEqual(bufferblock.size(), 3)
-        self.assertEqual(bufferblock.buffer_view[0:3], "foo")
-        self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
-        bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
+            self.assertEqual(bufferblock.size(), 3)
+            self.assertEqual(bufferblock.buffer_view[0:3], "foo")
+            self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
+            bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
 
-        bufferblock2 = blockmanager.dup_block(bufferblock, None)
-        self.assertNotEqual(bufferblock.blockid, bufferblock2.blockid)
+            bufferblock2 = blockmanager.dup_block(bufferblock, None)
+            self.assertNotEqual(bufferblock.blockid, bufferblock2.blockid)
 
-        bufferblock2.append("bar")
+            bufferblock2.append("bar")
 
-        self.assertEqual(bufferblock2.size(), 6)
-        self.assertEqual(bufferblock2.buffer_view[0:6], "foobar")
-        self.assertEqual(bufferblock2.locator(), "3858f62230ac3c915f300c664312c63f+6")
+            self.assertEqual(bufferblock2.size(), 6)
+            self.assertEqual(bufferblock2.buffer_view[0:6], "foobar")
+            self.assertEqual(bufferblock2.locator(), "3858f62230ac3c915f300c664312c63f+6")
 
-        self.assertEqual(bufferblock.size(), 3)
-        self.assertEqual(bufferblock.buffer_view[0:3], "foo")
-        self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
+            self.assertEqual(bufferblock.size(), 3)
+            self.assertEqual(bufferblock.buffer_view[0:3], "foo")
+            self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
 
     def test_bufferblock_get(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
-        blockmanager = arvados.arvfile._BlockManager(keep)
-        bufferblock = blockmanager.alloc_bufferblock()
-        bufferblock.append("foo")
+        with arvados.arvfile._BlockManager(keep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.append("foo")
 
-        self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), "0123456789")
-        self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), "foo")
+            self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), "0123456789")
+            self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), "foo")
 
     def test_bufferblock_commit(self):
         mockkeep = mock.MagicMock()
-        blockmanager = arvados.arvfile._BlockManager(mockkeep)
-        bufferblock = blockmanager.alloc_bufferblock()
-        bufferblock.owner = mock.MagicMock()
-        bufferblock.owner.flush.side_effect = lambda x: blockmanager.commit_bufferblock(bufferblock, False)
-        bufferblock.append("foo")
-        blockmanager.commit_all()
-        self.assertTrue(bufferblock.owner.flush.called)
-        self.assertTrue(mockkeep.put.called)
-        self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
-        self.assertIsNone(bufferblock.buffer_view)
+        with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.owner = mock.MagicMock()
+            def flush(sync=None):
+                blockmanager.commit_bufferblock(bufferblock, sync)
+            bufferblock.owner.flush.side_effect = flush
+            bufferblock.append("foo")
+            blockmanager.commit_all()
+            self.assertTrue(bufferblock.owner.flush.called)
+            self.assertTrue(mockkeep.put.called)
+            self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
+            self.assertIsNone(bufferblock.buffer_view)
+
+    def test_bufferblock_commit_pending(self):
+        # Test for bug #7225
+        mockkeep = mock.MagicMock()
+        mockkeep.put.side_effect = lambda x: time.sleep(1)
+        with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.append("foo")
+
+            blockmanager.commit_bufferblock(bufferblock, False)
+            self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
+
+            blockmanager.commit_bufferblock(bufferblock, True)
+            self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
 
 
     def test_bufferblock_commit_with_error(self):
         mockkeep = mock.MagicMock()
         mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail")
-        blockmanager = arvados.arvfile._BlockManager(mockkeep)
-        bufferblock = blockmanager.alloc_bufferblock()
-        bufferblock.owner = mock.MagicMock()
-        bufferblock.owner.flush.side_effect = lambda x: blockmanager.commit_bufferblock(bufferblock, False)
-        bufferblock.append("foo")
-        with self.assertRaises(arvados.errors.KeepWriteError) as err:
-            blockmanager.commit_all()
-        self.assertTrue(bufferblock.owner.flush.called)
-        self.assertEqual(str(err.exception), "Error writing some blocks: block acbd18db4cc2f85cedef654fccc4a4d8+3 raised KeepWriteError (fail)")
-        self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
+        with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.owner = mock.MagicMock()
+            def flush(sync=None):
+                blockmanager.commit_bufferblock(bufferblock, sync)
+            bufferblock.owner.flush.side_effect = flush
+            bufferblock.append("foo")
+            with self.assertRaises(arvados.errors.KeepWriteError) as err:
+                blockmanager.commit_all()
+            self.assertTrue(bufferblock.owner.flush.called)
+            self.assertEqual(str(err.exception), "Error writing some blocks: block acbd18db4cc2f85cedef654fccc4a4d8+3 raised KeepWriteError (fail)")
+            self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.ERROR)