"""
DEFAULT_PUT_THREADS = 2
- DEFAULT_GET_THREADS = 4
+ DEFAULT_GET_THREADS = 2
- def __init__(self, keep, copies=None, put_threads=None, num_retries=None, storage_classes_func=None):
+ def __init__(self, keep, copies=None, put_threads=None, num_retries=None, storage_classes_func=None, get_threads=None):
"""keep: KeepClient object to use"""
self._keep = keep
self._bufferblocks = collections.OrderedDict()
self.lock = threading.Lock()
self.prefetch_enabled = True
self.num_put_threads = put_threads or _BlockManager.DEFAULT_PUT_THREADS
- self.num_get_threads = _BlockManager.DEFAULT_GET_THREADS
+ self.num_get_threads = get_threads or _BlockManager.DEFAULT_GET_THREADS
self.copies = copies
self.storage_classes = storage_classes_func or (lambda: [])
self._pending_write_size = 0
if size == 0 or offset >= self.size():
return b''
readsegs = locators_and_ranges(self._segments, offset, size)
- prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE*4, limit=32)
+ prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE * self.parent._my_block_manager().num_get_threads, limit=32)
locs = set()
data = []
self.parent._my_block_manager().block_prefetch(lr.locator)
locs.add(lr.locator)
- if len(data) == 1:
- return data[0]
- else:
- return b''.join(data)
+ return b''.join(data)
@must_be_writable
@synchronized
block_manager=None,
replication_desired=None,
storage_classes_desired=None,
- put_threads=None):
+ put_threads=None,
+ get_threads=None):
"""Collection constructor.
:manifest_locator_or_text:
self.replication_desired = replication_desired
self._storage_classes_desired = storage_classes_desired
self.put_threads = put_threads
+ self.get_threads = get_threads
if apiconfig:
self._config = apiconfig
copies = (self.replication_desired or
self._my_api()._rootDesc.get('defaultCollectionReplication',
2))
- self._block_manager = _BlockManager(self._my_keep(), copies=copies, put_threads=self.put_threads, num_retries=self.num_retries, storage_classes_func=self.storage_classes_desired)
+ self._block_manager = _BlockManager(self._my_keep(),
+ copies=copies,
+ put_threads=self.put_threads,
+ num_retries=self.num_retries,
+ storage_classes_func=self.storage_classes_desired,
+ get_threads=self.get_threads,)
return self._block_manager
def _remember_api_response(self, response):
it.
""")
+parser.add_argument('--threads', type=int, metavar='N', default=2,
+ help="""
+Set the number of download threads to be used. Take into account that
+using lots of threads will increase the RAM requirements. Default is
+to use 2 threads.
+On high latency installations, using a greater number will improve
+overall throughput.
+""")
+
def parse_arguments(arguments, stdout, stderr):
args = parser.parse_args(arguments)
try:
reader = arvados.CollectionReader(
- col_loc, api_client=api_client, num_retries=args.retries)
+ col_loc, api_client=api_client, num_retries=args.retries,
+ keep_client=arvados.keep.KeepClient(block_cache=arvados.keep.KeepBlockCache((args.threads+1)*64 * 1024 * 1024)),
+ get_threads=args.threads)
except Exception as error:
logger.error("failed to read collection: {}".format(error))
return 1
class KeepBlockCache(object):
# Default RAM cache is 256MiB
- def __init__(self, cache_max=(1024 * 1024 * 1024)):
+ def __init__(self, cache_max=(256 * 1024 * 1024)):
self.cache_max = cache_max
self._cache = []
self._cache_lock = threading.Lock()
return True
if os.path.exists(os.path.join(self.local_store, locator.md5sum)):
return True
-