11684: When packing small blocks into one, save references of the files
[arvados.git] / sdk / python / tests / test_benchmark_collections.py
1 from __future__ import absolute_import
2 import arvados
3 import sys
4
5 from . import run_test_server
6 from . import arvados_testutil as tutil
7 from . import manifest_examples
8 from .performance.performance_profiler import profiled
9
10 class CollectionBenchmark(run_test_server.TestCaseWithServers,
11                           tutil.ArvadosBaseTestCase,
12                           manifest_examples.ManifestExamples):
13     MAIN_SERVER = {}
14     TEST_BLOCK_SIZE = 0
15
16     @classmethod
17     def list_recursive(cls, coll, parent_name=None):
18         if parent_name is None:
19             current_name = coll.stream_name()
20         else:
21             current_name = '{}/{}'.format(parent_name, coll.name)
22         try:
23             for name in coll:
24                 for item in cls.list_recursive(coll[name], current_name):
25                     yield item
26         except TypeError:
27             yield current_name
28
29     @classmethod
30     def setUpClass(cls):
31         super(CollectionBenchmark, cls).setUpClass()
32         run_test_server.authorize_with('active')
33         cls.api_client = arvados.api('v1')
34         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
35                                              local_store=cls.local_store)
36
37     @profiled
38     def profile_new_collection_from_manifest(self, manifest_text):
39         return arvados.collection.Collection(manifest_text)
40
41     @profiled
42     def profile_new_collection_from_server(self, uuid):
43         return arvados.collection.Collection(uuid)
44
45     @profiled
46     def profile_new_collection_copying_bytes_from_collection(self, src):
47         dst = arvados.collection.Collection()
48         with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
49             for name in self.list_recursive(src):
50                 with src.open(name, 'rb') as srcfile, dst.open(name, 'wb') as dstfile:
51                     dstfile.write(srcfile.read())
52             dst.save_new()
53
54     @profiled
55     def profile_new_collection_copying_files_from_collection(self, src):
56         dst = arvados.collection.Collection()
57         with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
58             for name in self.list_recursive(src):
59                 dst.copy(name, name, src)
60             dst.save_new()
61
62     @profiled
63     def profile_collection_list_files(self, coll):
64         return sum(1 for name in self.list_recursive(coll))
65
66     def test_medium_sized_manifest(self):
67         """Exercise manifest-handling code.
68
69         Currently, this test puts undue emphasis on some code paths
70         that don't reflect typical use because the contrived example
71         manifest has some unusual characteristics:
72
73         * Block size is zero.
74
75         * Every block is identical, so block caching patterns are
76           unrealistic.
77
78         * Every file begins and ends at a block boundary.
79         """
80         specs = {
81             'streams': 100,
82             'files_per_stream': 100,
83             'blocks_per_file': 20,
84             'bytes_per_block': self.TEST_BLOCK_SIZE,
85         }
86         my_manifest = self.make_manifest(**specs)
87
88         coll = self.profile_new_collection_from_manifest(my_manifest)
89
90         coll.save_new()
91         self.profile_new_collection_from_server(coll.manifest_locator())
92
93         num_items = self.profile_collection_list_files(coll)
94         self.assertEqual(num_items, specs['streams'] * specs['files_per_stream'])
95
96         self.profile_new_collection_copying_bytes_from_collection(coll)
97
98         self.profile_new_collection_copying_files_from_collection(coll)