logger = logging.getLogger('arvados.test_collection_create')
logger.setLevel(logging.INFO)
+max_manifest_size = 127*1024*1024
+
opts = argparse.ArgumentParser(add_help=False)
opts.add_argument('--min-files', type=int, default=30000, help="""
Minimum number of files on each directory. Default: 30000.
current_size += len(current_stream)
streams = [current_stream]
- if current_size >= (128 * 1024 * 1024):
+ if current_size >= max_manifest_size:
logger.debug("Maximum manifest size reached -- finishing early at {}".format(base_stream_name))
elif depth == 0:
logger.debug("Finished stream {}".format(base_stream_name))
substreams = create_substreams(depth-1, stream_name, max_filesize,
data_loc, args, current_size)
current_size += sum([len(x) for x in substreams])
- if current_size >= (128 * 1024 * 1024):
+ if current_size >= max_manifest_size:
break
streams.extend(substreams)
return streams
'.', max_filesize, data_loc, args)
manifest = ''
for s in streams:
- if len(manifest)+len(s) > (1024*1024*128)-2:
+ if len(manifest)+len(s) > max_manifest_size:
logger.info("Skipping stream {} to avoid making a manifest bigger than 128MiB".format(s.split(' ')[0]))
break
manifest += s + '\n'
try:
- coll = api.collections().create(body={
- "ensure_unique_name": True,
- "collection": {
- "name": get_random_name(False),
+ coll_name = get_random_name(False)
+ coll = api.collections().create(
+ body={"collection": {
+ "name": coll_name,
"manifest_text": manifest
},
}).execute()
except:
- logger.info("ERROR trying manifest:\n'{}...'\nSize: {}".format(manifest[0:1024], len(manifest)))
+ logger.info("ERROR creating collection with name '{}' and manifest:\n'{}...'\nSize: {}".format(coll_name, manifest[0:1024], len(manifest)))
raise
logger.info("Created collection {} - manifest size: {}".format(coll["uuid"], len(manifest)))
return 0