20318: Track estimated cache usage, and tidy more diligently.
[arvados.git] / sdk / python / tests / test_benchmark_collections.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from __future__ import absolute_import
6 import arvados
7 import sys
8
9 from . import run_test_server
10 from . import arvados_testutil as tutil
11 from . import manifest_examples
12 from .performance.performance_profiler import profiled
13
14 class CollectionBenchmark(run_test_server.TestCaseWithServers,
15                           tutil.ArvadosBaseTestCase,
16                           manifest_examples.ManifestExamples):
17     MAIN_SERVER = {}
18     TEST_BLOCK_SIZE = 0
19
20     @classmethod
21     def list_recursive(cls, coll, parent_name=None):
22         if parent_name is None:
23             current_name = coll.stream_name()
24         else:
25             current_name = '{}/{}'.format(parent_name, coll.name)
26         try:
27             for name in coll:
28                 for item in cls.list_recursive(coll[name], current_name):
29                     yield item
30         except TypeError:
31             yield current_name
32
33     @classmethod
34     def setUpClass(cls):
35         super(CollectionBenchmark, cls).setUpClass()
36         run_test_server.authorize_with('active')
37         cls.api_client = arvados.api('v1')
38         cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
39                                              local_store=cls.local_store)
40
41     @profiled
42     def profile_new_collection_from_manifest(self, manifest_text):
43         return arvados.collection.Collection(manifest_text)
44
45     @profiled
46     def profile_new_collection_from_server(self, uuid):
47         return arvados.collection.Collection(uuid)
48
49     @profiled
50     def profile_new_collection_copying_bytes_from_collection(self, src):
51         dst = arvados.collection.Collection()
52         with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
53             for name in self.list_recursive(src):
54                 with src.open(name, 'rb') as srcfile, dst.open(name, 'wb') as dstfile:
55                     dstfile.write(srcfile.read())
56             dst.save_new()
57
58     @profiled
59     def profile_new_collection_copying_files_from_collection(self, src):
60         dst = arvados.collection.Collection()
61         with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
62             for name in self.list_recursive(src):
63                 dst.copy(name, name, src)
64             dst.save_new()
65
66     @profiled
67     def profile_collection_list_files(self, coll):
68         return sum(1 for name in self.list_recursive(coll))
69
70     def test_medium_sized_manifest(self):
71         """Exercise manifest-handling code.
72
73         Currently, this test puts undue emphasis on some code paths
74         that don't reflect typical use because the contrived example
75         manifest has some unusual characteristics:
76
77         * Block size is zero.
78
79         * Every block is identical, so block caching patterns are
80           unrealistic.
81
82         * Every file begins and ends at a block boundary.
83         """
84         specs = {
85             'streams': 100,
86             'files_per_stream': 100,
87             'blocks_per_file': 20,
88             'bytes_per_block': self.TEST_BLOCK_SIZE,
89         }
90         my_manifest = self.make_manifest(**specs)
91
92         coll = self.profile_new_collection_from_manifest(my_manifest)
93
94         coll.save_new()
95         self.profile_new_collection_from_server(coll.manifest_locator())
96
97         num_items = self.profile_collection_list_files(coll)
98         self.assertEqual(num_items, specs['streams'] * specs['files_per_stream'])
99
100         self.profile_new_collection_copying_bytes_from_collection(coll)
101
102         self.profile_new_collection_copying_files_from_collection(coll)