X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/1e3f8ceebd90058e902494fae84b1fd57ac6693b..97e40209600c094eac15085627e49da52ab0f517:/sdk/python/arvados/commands/get.py diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py index b866bf75e2..b37a8477ac 100755 --- a/sdk/python/arvados/commands/get.py +++ b/sdk/python/arvados/commands/get.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 @@ -6,6 +6,7 @@ import argparse import hashlib import os +import pathlib import re import string import sys @@ -17,7 +18,6 @@ import arvados.util as util from arvados._version import __version__ -api_client = None logger = logging.getLogger('arvados.arv-get') parser = argparse.ArgumentParser( @@ -81,6 +81,10 @@ Overwrite existing files while writing. The default behavior is to refuse to write *anything* if any of the output files already exist. As a special case, -f is not needed to write to stdout. """) +group.add_argument('-v', action='count', default=0, + help=""" +Once for verbose mode, twice for debug mode. +""") group.add_argument('--skip-existing', action='store_true', help=""" Skip files that already exist. The default behavior is to refuse to @@ -94,6 +98,15 @@ When getting a collection manifest, strip its access tokens before writing it. """) +parser.add_argument('--threads', type=int, metavar='N', default=4, + help=""" +Set the number of download threads to be used. Take into account that +using lots of threads will increase the RAM requirements. Default is +to use 4 threads. +On high latency installations, using a greater number will improve +overall throughput. +""") + def parse_arguments(arguments, stdout, stderr): args = parser.parse_args(arguments) @@ -133,15 +146,17 @@ def parse_arguments(arguments, stdout, stderr): return args def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): - global api_client - if stdout is sys.stdout and hasattr(stdout, 'buffer'): # in Python 3, write to stdout as binary stdout = stdout.buffer args = parse_arguments(arguments, stdout, stderr) - if api_client is None: - api_client = arvados.api('v1') + logger.setLevel(logging.WARNING - 10 * args.v) + + request_id = arvados.util.new_request_id() + logger.info('X-Request-Id: '+request_id) + + api_client = arvados.api('v1', request_id=request_id, num_retries=args.retries) r = re.search(r'^(.*?)(/.*)?$', args.locator) col_loc = r.group(1) @@ -157,14 +172,15 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): open_flags |= os.O_EXCL try: if args.destination == "-": - write_block_or_manifest(dest=stdout, src=col_loc, - api_client=api_client, args=args) + write_block_or_manifest( + dest=stdout, src=col_loc, + api_client=api_client, args=args) else: out_fd = os.open(args.destination, open_flags) with os.fdopen(out_fd, 'wb') as out_file: - write_block_or_manifest(dest=out_file, - src=col_loc, api_client=api_client, - args=args) + write_block_or_manifest( + dest=out_file, src=col_loc, + api_client=api_client, args=args) except (IOError, OSError) as error: logger.error("can't write to '{}': {}".format(args.destination, error)) return 1 @@ -180,7 +196,9 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): return 0 try: - reader = arvados.CollectionReader(col_loc, num_retries=args.retries) + reader = arvados.CollectionReader( + col_loc, api_client=api_client, num_retries=args.retries, + keep_client=arvados.keep.KeepClient(block_cache=arvados.keep.KeepBlockCache((args.threads+1)*64 * 1024 * 1024), num_prefetch_threads=args.threads)) except Exception as error: logger.error("failed to read collection: {}".format(error)) return 1 @@ -244,7 +262,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): logger.error('Local file %s already exists.' % (outfilename,)) return 1 if args.r: - arvados.util.mkdir_dash_p(os.path.dirname(outfilename)) + pathlib.Path(outfilename).parent.mkdir(parents=True, exist_ok=True) try: outfile = open(outfilename, 'wb') except Exception as error: @@ -268,7 +286,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): if todo_bytes==0 else 100.0*out_bytes/todo_bytes))) elif args.batch_progress: - stderr.write('%s %d read %d total\n' % + stderr.write('%s %d read %d total %d\n' % (sys.argv[0], os.getpid(), out_bytes, todo_bytes)) if digestor: @@ -301,9 +319,10 @@ def files_in_collection(c): def write_block_or_manifest(dest, src, api_client, args): if '+A' in src: # block locator - kc = KeepClient(api_client=api_client) + kc = arvados.keep.KeepClient(api_client=api_client) dest.write(kc.get(src, num_retries=args.retries)) else: # collection UUID or portable data hash - reader = arvados.CollectionReader(src, num_retries=args.retries) + reader = arvados.CollectionReader( + src, api_client=api_client, num_retries=args.retries) dest.write(reader.manifest_text(strip=args.strip_manifest).encode())