X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/eebeedb8c47e8dfdb39b322e0a0914811830d323..0dab89df8040f203a33bc1922df0ff893791def7:/tools/test-collection-create/test-collection-create.py diff --git a/tools/test-collection-create/test-collection-create.py b/tools/test-collection-create/test-collection-create.py index 8031f29fb7..c8eae240d2 100644 --- a/tools/test-collection-create/test-collection-create.py +++ b/tools/test-collection-create/test-collection-create.py @@ -16,6 +16,8 @@ import arvados.collection logger = logging.getLogger('arvados.test_collection_create') logger.setLevel(logging.INFO) +max_manifest_size = 127*1024*1024 + opts = argparse.ArgumentParser(add_help=False) opts.add_argument('--min-files', type=int, default=30000, help=""" Minimum number of files on each directory. Default: 30000. @@ -29,6 +31,12 @@ Minimum depth for the created tree structure. Default: 0. opts.add_argument('--max-depth', type=int, default=0, help=""" Maximum depth for the created tree structure. Default: 0. """) +opts.add_argument('--min-subdirs', type=int, default=1, help=""" +Minimum number of subdirectories created at every depth level. Default: 1. +""") +opts.add_argument('--max-subdirs', type=int, default=10, help=""" +Maximum number of subdirectories created at every depth level. Default: 10. +""") opts.add_argument('--debug', action='store_true', default=False, help=""" Sets logging level to DEBUG. """) @@ -375,17 +383,17 @@ def create_substreams(depth, base_stream_name, max_filesize, data_loc, args, cur current_size += len(current_stream) streams = [current_stream] - if current_size >= (128 * 1024 * 1024): + if current_size >= max_manifest_size: logger.debug("Maximum manifest size reached -- finishing early at {}".format(base_stream_name)) elif depth == 0: logger.debug("Finished stream {}".format(base_stream_name)) else: - for _ in range(random.randint(1, 10)): + for _ in range(random.randint(args.min_subdirs, args.max_subdirs)): stream_name = base_stream_name+'/'+get_random_name(False) substreams = create_substreams(depth-1, stream_name, max_filesize, data_loc, args, current_size) current_size += sum([len(x) for x in substreams]) - if current_size >= (128 * 1024 * 1024): + if current_size >= max_manifest_size: break streams.extend(substreams) return streams @@ -400,11 +408,13 @@ def parse_arguments(arguments): arg_parser.error("--min-depth should be at least 0") if args.max_depth < 0 or args.max_depth < args.min_depth: arg_parser.error("--max-depth should be at >= 0 and >= min-depth={}".format(args.min_depth)) + if args.max_subdirs < args.min_subdirs: + arg_parser.error("--min-subdirs={} should be less or equal than max-subdirs={}".format(args.min_subdirs, args.max_subdirs)) return args def main(arguments=None): args = parse_arguments(arguments) - logger.info("Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={})...".format(args.min_files, args.max_files, args.min_depth, args.max_depth)) + logger.info("Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={}) and (min={}, max={}) subdirs in each depth level...".format(args.min_files, args.max_files, args.min_depth, args.max_depth, args.min_subdirs, args.max_subdirs)) api = arvados.api('v1', timeout=5*60) max_filesize = 1024*1024 data_block = ''.join([random.choice(string.printable) for i in range(max_filesize)]) @@ -413,20 +423,20 @@ def main(arguments=None): '.', max_filesize, data_loc, args) manifest = '' for s in streams: - if len(manifest)+len(s) > (1024*1024*128)-2: + if len(manifest)+len(s) > max_manifest_size: logger.info("Skipping stream {} to avoid making a manifest bigger than 128MiB".format(s.split(' ')[0])) break manifest += s + '\n' try: - coll = api.collections().create(body={ - "ensure_unique_name": True, - "collection": { - "name": get_random_name(False), + coll_name = get_random_name(False) + coll = api.collections().create( + body={"collection": { + "name": coll_name, "manifest_text": manifest }, }).execute() except: - logger.info("ERROR trying manifest:\n'{}...'\nSize: {}".format(manifest[0:1024], len(manifest))) + logger.info("ERROR creating collection with name '{}' and manifest:\n'{}...'\nSize: {}".format(coll_name, manifest[0:1024], len(manifest))) raise logger.info("Created collection {} - manifest size: {}".format(coll["uuid"], len(manifest))) return 0