X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/47e59a35d5ed9b2cdb052894d741972324058505..3fa6aa4043286ad61e5f29c136d3cc2942e8750d:/sdk/python/arvados/commands/get.py diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py index 1e52714916..bb421def61 100755 --- a/sdk/python/arvados/commands/get.py +++ b/sdk/python/arvados/commands/get.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 @@ -17,7 +17,6 @@ import arvados.util as util from arvados._version import __version__ -api_client = None logger = logging.getLogger('arvados.arv-get') parser = argparse.ArgumentParser( @@ -98,6 +97,15 @@ When getting a collection manifest, strip its access tokens before writing it. """) +parser.add_argument('--threads', type=int, metavar='N', default=4, + help=""" +Set the number of download threads to be used. Take into account that +using lots of threads will increase the RAM requirements. Default is +to use 4 threads. +On high latency installations, using a greater number will improve +overall throughput. +""") + def parse_arguments(arguments, stdout, stderr): args = parser.parse_args(arguments) @@ -137,8 +145,6 @@ def parse_arguments(arguments, stdout, stderr): return args def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): - global api_client - if stdout is sys.stdout and hasattr(stdout, 'buffer'): # in Python 3, write to stdout as binary stdout = stdout.buffer @@ -149,8 +155,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): request_id = arvados.util.new_request_id() logger.info('X-Request-Id: '+request_id) - if api_client is None: - api_client = arvados.api('v1', request_id=request_id) + api_client = arvados.api('v1', request_id=request_id) r = re.search(r'^(.*?)(/.*)?$', args.locator) col_loc = r.group(1) @@ -191,7 +196,9 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): try: reader = arvados.CollectionReader( - col_loc, api_client=api_client, num_retries=args.retries) + col_loc, api_client=api_client, num_retries=args.retries, + keep_client=arvados.keep.KeepClient(block_cache=arvados.keep.KeepBlockCache((args.threads+1)*64 * 1024 * 1024)), + get_threads=args.threads) except Exception as error: logger.error("failed to read collection: {}".format(error)) return 1 @@ -279,7 +286,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): if todo_bytes==0 else 100.0*out_bytes/todo_bytes))) elif args.batch_progress: - stderr.write('%s %d read %d total\n' % + stderr.write('%s %d read %d total %d\n' % (sys.argv[0], os.getpid(), out_bytes, todo_bytes)) if digestor: