X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/03188ad6eb14ee3dcd6bdf74198624c9358936c5..add09f355520b2bd1214b8c74d5a8d6c8e76dc88:/sdk/python/arvados/commands/get.py diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py index e54fd87a19..eb68297625 100755 --- a/sdk/python/arvados/commands/get.py +++ b/sdk/python/arvados/commands/get.py @@ -1,4 +1,7 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 import argparse import hashlib @@ -78,6 +81,10 @@ Overwrite existing files while writing. The default behavior is to refuse to write *anything* if any of the output files already exist. As a special case, -f is not needed to write to stdout. """) +group.add_argument('-v', action='count', default=0, + help=""" +Once for verbose mode, twice for debug mode. +""") group.add_argument('--skip-existing', action='store_true', help=""" Skip files that already exist. The default behavior is to refuse to @@ -87,7 +94,7 @@ skipped. """) group.add_argument('--strip-manifest', action='store_true', default=False, help=""" -When getting a collection manifest, strip its access tokens before writing +When getting a collection manifest, strip its access tokens before writing it. """) @@ -131,21 +138,25 @@ def parse_arguments(arguments, stdout, stderr): def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): global api_client - + + if stdout is sys.stdout and hasattr(stdout, 'buffer'): + # in Python 3, write to stdout as binary + stdout = stdout.buffer + args = parse_arguments(arguments, stdout, stderr) + logger.setLevel(logging.WARNING - 10 * args.v) + + request_id = arvados.util.new_request_id() + logger.info('X-Request-Id: '+request_id) + if api_client is None: - api_client = arvados.api('v1') + api_client = arvados.api('v1', request_id=request_id) r = re.search(r'^(.*?)(/.*)?$', args.locator) col_loc = r.group(1) get_prefix = r.group(2) if args.r and not get_prefix: get_prefix = os.sep - try: - reader = arvados.CollectionReader(col_loc, num_retries=args.retries) - except Exception as error: - logger.error("failed to read collection: {}".format(error)) - return 1 # User asked to download the collection's manifest if not get_prefix: @@ -155,19 +166,36 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): open_flags |= os.O_EXCL try: if args.destination == "-": - stdout.write(reader.manifest_text(strip=args.strip_manifest)) + write_block_or_manifest( + dest=stdout, src=col_loc, + api_client=api_client, args=args) else: out_fd = os.open(args.destination, open_flags) with os.fdopen(out_fd, 'wb') as out_file: - out_file.write(reader.manifest_text(strip=args.strip_manifest)) + write_block_or_manifest( + dest=out_file, src=col_loc, + api_client=api_client, args=args) except (IOError, OSError) as error: logger.error("can't write to '{}': {}".format(args.destination, error)) return 1 except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error: logger.error("failed to download '{}': {}".format(col_loc, error)) return 1 + except arvados.errors.ArgumentError as error: + if 'Argument to CollectionReader' in str(error): + logger.error("error reading collection: {}".format(error)) + return 1 + else: + raise return 0 + try: + reader = arvados.CollectionReader( + col_loc, api_client=api_client, num_retries=args.retries) + except Exception as error: + logger.error("failed to read collection: {}".format(error)) + return 1 + # Scan the collection. Make an array of (stream, file, local # destination filename) tuples, and add up total size to extract. todo = [] @@ -236,7 +264,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): if args.hash: digestor = hashlib.new(args.hash) try: - with s.open(f.name, 'r') as file_reader: + with s.open(f.name, 'rb') as file_reader: for data in file_reader.readall(): if outfile: outfile.write(data) @@ -271,7 +299,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): def files_in_collection(c): # Sort first by file type, then alphabetically by file path. - for i in sorted(c.keys(), + for i in sorted(list(c.keys()), key=lambda k: ( isinstance(c[k], arvados.collection.Subcollection), k.upper())): @@ -280,3 +308,14 @@ def files_in_collection(c): elif isinstance(c[i], arvados.collection.Subcollection): for s, f in files_in_collection(c[i]): yield (s, f) + +def write_block_or_manifest(dest, src, api_client, args): + if '+A' in src: + # block locator + kc = arvados.keep.KeepClient(api_client=api_client) + dest.write(kc.get(src, num_retries=args.retries)) + else: + # collection UUID or portable data hash + reader = arvados.CollectionReader( + src, api_client=api_client, num_retries=args.retries) + dest.write(reader.manifest_text(strip=args.strip_manifest).encode())