X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/df9e166a5ffc4aa79658bec1a5d552a3b413f0d8..2e74236fa27822addd856f194befc28382990ce0:/sdk/python/bin/arv-get diff --git a/sdk/python/bin/arv-get b/sdk/python/bin/arv-get index 2451416dae..a36914192f 100755 --- a/sdk/python/bin/arv-get +++ b/sdk/python/bin/arv-get @@ -1,224 +1,10 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 -import argparse -import hashlib -import os -import re -import string import sys -import logging -import arvados -import arvados.commands._util as arv_cmd +from arvados.commands.get import main -logger = logging.getLogger('arvados.arv-get') - -def abort(msg, code=1): - print >>sys.stderr, "arv-get:", msg - exit(code) - -parser = argparse.ArgumentParser( - description='Copy data from Keep to a local file or pipe.', - parents=[arv_cmd.retry_opt]) -parser.add_argument('locator', type=str, - help=""" -Collection locator, optionally with a file path or prefix. -""") -parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout', - help=""" -Local file or directory where the data is to be written. Default: -/dev/stdout. -""") -group = parser.add_mutually_exclusive_group() -group.add_argument('--progress', action='store_true', - help=""" -Display human-readable progress on stderr (bytes and, if possible, -percentage of total data size). This is the default behavior when it -is not expected to interfere with the output: specifically, stderr is -a tty _and_ either stdout is not a tty, or output is being written to -named files rather than stdout. -""") -group.add_argument('--no-progress', action='store_true', - help=""" -Do not display human-readable progress on stderr. -""") -group.add_argument('--batch-progress', action='store_true', - help=""" -Display machine-readable progress on stderr (bytes and, if known, -total data size). -""") -group = parser.add_mutually_exclusive_group() -group.add_argument('--hash', - help=""" -Display the hash of each file as it is read from Keep, using the given -hash algorithm. Supported algorithms include md5, sha1, sha224, -sha256, sha384, and sha512. -""") -group.add_argument('--md5sum', action='store_const', - dest='hash', const='md5', - help=""" -Display the MD5 hash of each file as it is read from Keep. -""") -parser.add_argument('-n', action='store_true', - help=""" -Do not write any data -- just read from Keep, and report md5sums if -requested. -""") -parser.add_argument('-r', action='store_true', - help=""" -Retrieve all files in the specified collection/prefix. This is the -default behavior if the "locator" argument ends with a forward slash. -""") -group = parser.add_mutually_exclusive_group() -group.add_argument('-f', action='store_true', - help=""" -Overwrite existing files while writing. The default behavior is to -refuse to write *anything* if any of the output files already -exist. As a special case, -f is not needed to write to /dev/stdout. -""") -group.add_argument('--skip-existing', action='store_true', - help=""" -Skip files that already exist. The default behavior is to refuse to -write *anything* if any files exist that would have to be -overwritten. This option causes even devices, sockets, and fifos to be -skipped. -""") - -args = parser.parse_args() - -if args.locator[-1] == os.sep: - args.r = True -if (args.r and - not args.n and - not (args.destination and - os.path.isdir(args.destination))): - parser.error('Destination is not a directory.') -if not args.r and (os.path.isdir(args.destination) or - args.destination[-1] == os.path.sep): - args.destination = os.path.join(args.destination, - os.path.basename(args.locator)) - logger.debug("Appended source file name to destination directory: %s", - args.destination) - -if args.destination == '-': - args.destination = '/dev/stdout' -if args.destination == '/dev/stdout': - # Normally you have to use -f to write to a file (or device) that - # already exists, but "-" and "/dev/stdout" are common enough to - # merit a special exception. - args.f = True -else: - args.destination = args.destination.rstrip(os.sep) - -# Turn on --progress by default if stderr is a tty and output is -# either going to a named file, or going (via stdout) to something -# that isn't a tty. -if (not (args.batch_progress or args.no_progress) - and sys.stderr.isatty() - and (args.destination != '/dev/stdout' - or not sys.stdout.isatty())): - args.progress = True - - -r = re.search(r'^(.*?)(/.*)?$', args.locator) -collection = r.group(1) -get_prefix = r.group(2) -if args.r and not get_prefix: - get_prefix = os.sep -api_client = arvados.api('v1') -reader = arvados.CollectionReader(collection, num_retries=args.retries) - -if not get_prefix: - if not args.n: - open_flags = os.O_CREAT | os.O_WRONLY - if not args.f: - open_flags |= os.O_EXCL - try: - out_fd = os.open(args.destination, open_flags) - with os.fdopen(out_fd, 'wb') as out_file: - out_file.write(reader.manifest_text()) - except (IOError, OSError) as error: - abort("can't write to '{}': {}".format(args.destination, error)) - except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error: - abort("failed to download '{}': {}".format(collection, error)) - sys.exit(0) - -reader.normalize() - -# Scan the collection. Make an array of (stream, file, local -# destination filename) tuples, and add up total size to extract. -todo = [] -todo_bytes = 0 -try: - for s in reader.all_streams(): - for f in s.all_files(): - if get_prefix and get_prefix[-1] == os.sep: - if 0 != string.find(os.path.join(s.name(), f.name()), - '.' + get_prefix): - continue - dest_path = os.path.join( - args.destination, - os.path.join(s.name(), f.name())[len(get_prefix)+1:]) - if (not (args.n or args.f or args.skip_existing) and - os.path.exists(dest_path)): - abort('Local file %s already exists.' % (dest_path,)) - else: - if os.path.join(s.name(), f.name()) != '.' + get_prefix: - continue - dest_path = args.destination - todo += [(s, f, dest_path)] - todo_bytes += f.size() -except arvados.errors.NotFoundError as e: - abort(e) - -# Read data, and (if not -n) write to local file(s) or pipe. - -out_bytes = 0 -for s,f,outfilename in todo: - outfile = None - digestor = None - if not args.n: - if args.skip_existing and os.path.exists(outfilename): - logger.debug('Local file %s exists. Skipping.', outfilename) - continue - elif not args.f and (os.path.isfile(outfilename) or - os.path.isdir(outfilename)): - # Good thing we looked again: apparently this file wasn't - # here yet when we checked earlier. - abort('Local file %s already exists.' % (outfilename,)) - if args.r: - arvados.util.mkdir_dash_p(os.path.dirname(outfilename)) - try: - outfile = open(outfilename, 'wb') - except Exception as error: - abort('Open(%s) failed: %s' % (outfilename, error)) - if args.hash: - digestor = hashlib.new(args.hash) - try: - for data in f.readall(): - if outfile: - outfile.write(data) - if digestor: - digestor.update(data) - out_bytes += len(data) - if args.progress: - sys.stderr.write('\r%d MiB / %d MiB %.1f%%' % - (out_bytes >> 20, - todo_bytes >> 20, - (100 - if todo_bytes==0 - else 100.0*out_bytes/todo_bytes))) - elif args.batch_progress: - sys.stderr.write('%s %d read %d total\n' % - (sys.argv[0], os.getpid(), - out_bytes, todo_bytes)) - if digestor: - sys.stderr.write("%s %s/%s\n" - % (digestor.hexdigest(), s.name(), f.name())) - except KeyboardInterrupt: - if outfile and outfilename != '/dev/stdout': - os.unlink(outfilename) - break - -if args.progress: - sys.stderr.write('\n') +sys.exit(main(sys.argv[1:]))