X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/aafb1a30ab96bb6f88d1988b9eef5c8127887175..8077e894a1f48e526c05ccf5cff22cfaae85ba69:/sdk/python/test_collections.py diff --git a/sdk/python/test_collections.py b/sdk/python/test_collections.py index 38e3cb1531..913c888830 100644 --- a/sdk/python/test_collections.py +++ b/sdk/python/test_collections.py @@ -5,6 +5,9 @@ import unittest import arvados import os +import bz2 +import sys +import subprocess class KeepLocalStoreTest(unittest.TestCase): def setUp(self): @@ -31,7 +34,7 @@ class LocalCollectionWriterTest(unittest.TestCase): cw.set_current_file_name('baz.txt') hash = cw.finish() self.assertEqual(hash, - 'a4d26dddc10ad8b5eb39347c916de16c+112', + '23ca013983d6239e98931cc779e68426+114', 'resulting manifest hash is not what I expected') class LocalCollectionReaderTest(unittest.TestCase): @@ -39,14 +42,14 @@ class LocalCollectionReaderTest(unittest.TestCase): os.environ['KEEP_LOCAL_STORE'] = '/tmp' LocalCollectionWriterTest().runTest() def runTest(self): - cr = arvados.CollectionReader('a4d26dddc10ad8b5eb39347c916de16c+112') + cr = arvados.CollectionReader('23ca013983d6239e98931cc779e68426+114') got = [] for s in cr.all_streams(): for f in s.all_files(): got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]] expected = [[3, '.', 'foo.txt', 'foo'], [3, '.', 'bar.txt', 'bar'], - [3, 'baz', 'baz.txt', 'baz']] + [3, './baz', 'baz.txt', 'baz']] self.assertEqual(got, expected, 'resulting file list is not what I expected') @@ -69,22 +72,42 @@ class LocalCollectionManifestSubsetTest(unittest.TestCase): os.environ['KEEP_LOCAL_STORE'] = '/tmp' LocalCollectionWriterTest().runTest() def runTest(self): - cr = arvados.CollectionReader('a4d26dddc10ad8b5eb39347c916de16c+112') + self._runTest('23ca013983d6239e98931cc779e68426+114', + [[3, '.', 'foo.txt', 'foo'], + [3, '.', 'bar.txt', 'bar'], + [3, './baz', 'baz.txt', 'baz']]) + self._runTest((". %s %s 0:3:foo.txt 3:3:bar.txt\n" % + (arvados.Keep.put("foo"), + arvados.Keep.put("bar"))), + [[3, '.', 'foo.txt', 'foo'], + [3, '.', 'bar.txt', 'bar']]) + self._runTest((". %s %s 0:2:fo.txt 2:4:obar.txt\n" % + (arvados.Keep.put("foo"), + arvados.Keep.put("bar"))), + [[2, '.', 'fo.txt', 'fo'], + [4, '.', 'obar.txt', 'obar']]) + self._runTest((". %s %s 0:2:fo.txt 2:0:zero.txt 2:2:ob.txt 4:2:ar.txt\n" % + (arvados.Keep.put("foo"), + arvados.Keep.put("bar"))), + [[2, '.', 'fo.txt', 'fo'], + [0, '.', 'zero.txt', ''], + [2, '.', 'ob.txt', 'ob'], + [2, '.', 'ar.txt', 'ar']]) + def _runTest(self, collection, expected): + cr = arvados.CollectionReader(collection) manifest_subsets = [] for s in cr.all_streams(): for f in s.all_files(): manifest_subsets += [f.as_manifest()] - got = [] + expect_i = 0 for m in manifest_subsets: cr = arvados.CollectionReader(m) for f in cr.all_files(): - got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]] - expected = [[3, '.', 'foo.txt', 'foo'], - [3, '.', 'bar.txt', 'bar'], - [3, 'baz', 'baz.txt', 'baz']] - self.assertEqual(got, - expected, - 'all_files|as_manifest did not preserve manifest contents') + got = [f.size(), f.stream_name(), f.name(), "".join(f.readall(2**26))] + self.assertEqual(got, + expected[expect_i], + 'all_files|as_manifest did not preserve manifest contents: got %s expected %s' % (got, expected[expect_i])) + expect_i += 1 class LocalCollectionReadlineTest(unittest.TestCase): def setUp(self): @@ -106,3 +129,79 @@ class LocalCollectionReadlineTest(unittest.TestCase): ["\n", "a\n", "bcd\n", "\n", "efg\n", "z"]) self._runTest("ab\ncd\n", ["ab\n", "cd\n"]) + +class LocalCollectionEmptyFileTest(unittest.TestCase): + def setUp(self): + os.environ['KEEP_LOCAL_STORE'] = '/tmp' + def runTest(self): + cw = arvados.CollectionWriter() + cw.start_new_file('zero.txt') + cw.write('') + self.check_manifest_file_sizes(cw.manifest_text(), [0]) + cw = arvados.CollectionWriter() + cw.start_new_file('zero.txt') + cw.write('') + cw.start_new_file('one.txt') + cw.write('1') + cw.start_new_stream('foo') + cw.start_new_file('zero.txt') + cw.write('') + self.check_manifest_file_sizes(cw.manifest_text(), [0,1,0]) + def check_manifest_file_sizes(self, manifest_text, expect_sizes): + cr = arvados.CollectionReader(manifest_text) + got_sizes = [] + for f in cr.all_files(): + got_sizes += [f.size()] + self.assertEqual(got_sizes, expect_sizes, "got wrong file sizes %s, expected %s" % (got_sizes, expect_sizes)) + +class LocalCollectionBZ2DecompressionTest(unittest.TestCase): + def setUp(self): + os.environ['KEEP_LOCAL_STORE'] = '/tmp' + def runTest(self): + n_lines_in = 2**18 + data_in = "abc\n" + for x in xrange(0, 18): + data_in += data_in + compressed_data_in = bz2.compress(data_in) + cw = arvados.CollectionWriter() + cw.start_new_file('test.bz2') + cw.write(compressed_data_in) + bz2_manifest = cw.manifest_text() + + cr = arvados.CollectionReader(bz2_manifest) + got = 0 + for x in list(cr.all_files())[0].readlines(): + self.assertEqual(x, "abc\n", "decompression returned wrong data: %s" % x) + got += 1 + self.assertEqual(got, + n_lines_in, + "decompression returned %d lines instead of %d" % (got, n_lines_in)) + +class LocalCollectionGzipDecompressionTest(unittest.TestCase): + def setUp(self): + os.environ['KEEP_LOCAL_STORE'] = '/tmp' + def runTest(self): + n_lines_in = 2**18 + data_in = "abc\n" + for x in xrange(0, 18): + data_in += data_in + p = subprocess.Popen(["gzip", "-1cn"], + stdout=subprocess.PIPE, + stdin=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=False, close_fds=True) + compressed_data_in, stderrdata = p.communicate(data_in) + + cw = arvados.CollectionWriter() + cw.start_new_file('test.gz') + cw.write(compressed_data_in) + gzip_manifest = cw.manifest_text() + + cr = arvados.CollectionReader(gzip_manifest) + got = 0 + for x in list(cr.all_files())[0].readlines(): + self.assertEqual(x, "abc\n", "decompression returned wrong data: %s" % x) + got += 1 + self.assertEqual(got, + n_lines_in, + "decompression returned %d lines instead of %d" % (got, n_lines_in))