X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/aa2adbcaa06c4f5dc7a6e54f3d9a5148b20fbbb1..4fd735476b69fd5e5d5c3162a752a8a5022d8d18:/sdk/python/bin/arv-get diff --git a/sdk/python/bin/arv-get b/sdk/python/bin/arv-get index 7f02cf73df..60d4bec3b9 100755 --- a/sdk/python/bin/arv-get +++ b/sdk/python/bin/arv-get @@ -9,6 +9,7 @@ import sys import logging import arvados +import arvados.commands._util as arv_cmd logger = logging.getLogger('arvados.arv-get') @@ -17,15 +18,15 @@ def abort(msg, code=1): exit(code) parser = argparse.ArgumentParser( - description='Copy data from Keep to a local file or pipe.') + description='Copy data from Keep to a local file or pipe.', + parents=[arv_cmd.retry_opt]) parser.add_argument('locator', type=str, help=""" Collection locator, optionally with a file path or prefix. """) -parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout', +parser.add_argument('destination', type=str, nargs='?', default='-', help=""" -Local file or directory where the data is to be written. Default: -/dev/stdout. +Local file or directory where the data is to be written. Default: stdout. """) group = parser.add_mutually_exclusive_group() group.add_argument('--progress', action='store_true', @@ -72,7 +73,7 @@ group.add_argument('-f', action='store_true', help=""" Overwrite existing files while writing. The default behavior is to refuse to write *anything* if any of the output files already -exist. As a special case, -f is not needed to write to /dev/stdout. +exist. As a special case, -f is not needed to write to stdout. """) group.add_argument('--skip-existing', action='store_true', help=""" @@ -98,9 +99,10 @@ if not args.r and (os.path.isdir(args.destination) or logger.debug("Appended source file name to destination directory: %s", args.destination) -if args.destination == '-': - args.destination = '/dev/stdout' if args.destination == '/dev/stdout': + args.destination = "-" + +if args.destination == '-': # Normally you have to use -f to write to a file (or device) that # already exists, but "-" and "/dev/stdout" are common enough to # merit a special exception. @@ -113,7 +115,7 @@ else: # that isn't a tty. if (not (args.batch_progress or args.no_progress) and sys.stderr.isatty() - and (args.destination != '/dev/stdout' + and (args.destination != '-' or not sys.stdout.isatty())): args.progress = True @@ -123,35 +125,33 @@ collection = r.group(1) get_prefix = r.group(2) if args.r and not get_prefix: get_prefix = os.sep +api_client = arvados.api('v1') +reader = arvados.CollectionReader(collection, num_retries=args.retries) -todo = [] -todo_bytes = 0 if not get_prefix: - try: - if not args.n: - if not args.f and os.path.exists(args.destination): - abort('Local file %s already exists.' % (args.destination,)) - with open(args.destination, 'wb') as f: - try: - c = arvados.api('v1').collections().get( - uuid=collection).execute() - manifest = c['manifest_text'] - except Exception as e: - logger.warning( - "Collection %s not found. " + - "Trying to fetch directly from Keep (deprecated).", - collection) - manifest = arvados.Keep.get(collection) - f.write(manifest) - sys.exit(0) - except arvados.errors.NotFoundError as e: - abort(e) - -reader = arvados.CollectionReader(collection) + if not args.n: + open_flags = os.O_CREAT | os.O_WRONLY + if not args.f: + open_flags |= os.O_EXCL + try: + if args.destination == "-": + sys.stdout.write(reader.manifest_text()) + else: + out_fd = os.open(args.destination, open_flags) + with os.fdopen(out_fd, 'wb') as out_file: + out_file.write(reader.manifest_text()) + except (IOError, OSError) as error: + abort("can't write to '{}': {}".format(args.destination, error)) + except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error: + abort("failed to download '{}': {}".format(collection, error)) + sys.exit(0) + +reader.normalize() # Scan the collection. Make an array of (stream, file, local # destination filename) tuples, and add up total size to extract. - +todo = [] +todo_bytes = 0 try: for s in reader.all_streams(): for f in s.all_files(): @@ -159,12 +159,15 @@ try: if 0 != string.find(os.path.join(s.name(), f.name()), '.' + get_prefix): continue - dest_path = os.path.join( - args.destination, - os.path.join(s.name(), f.name())[len(get_prefix)+1:]) - if (not (args.n or args.f or args.skip_existing) and - os.path.exists(dest_path)): - abort('Local file %s already exists.' % (dest_path,)) + if args.destination == "-": + dest_path = "-" + else: + dest_path = os.path.join( + args.destination, + os.path.join(s.name(), f.name())[len(get_prefix)+1:]) + if (not (args.n or args.f or args.skip_existing) and + os.path.exists(dest_path)): + abort('Local file %s already exists.' % (dest_path,)) else: if os.path.join(s.name(), f.name()) != '.' + get_prefix: continue @@ -181,20 +184,23 @@ for s,f,outfilename in todo: outfile = None digestor = None if not args.n: - if args.skip_existing and os.path.exists(outfilename): - logger.debug('Local file %s exists. Skipping.', outfilename) - continue - elif not args.f and (os.path.isfile(outfilename) or - os.path.isdir(outfilename)): - # Good thing we looked again: apparently this file wasn't - # here yet when we checked earlier. - abort('Local file %s already exists.' % (outfilename,)) - if args.r: - arvados.util.mkdir_dash_p(os.path.dirname(outfilename)) - try: - outfile = open(outfilename, 'wb') - except Exception as e: - abort('Open(%s) failed: %s' % (outfilename, e)) + if outfilename == "-": + outfile = sys.stdout + else: + if args.skip_existing and os.path.exists(outfilename): + logger.debug('Local file %s exists. Skipping.', outfilename) + continue + elif not args.f and (os.path.isfile(outfilename) or + os.path.isdir(outfilename)): + # Good thing we looked again: apparently this file wasn't + # here yet when we checked earlier. + abort('Local file %s already exists.' % (outfilename,)) + if args.r: + arvados.util.mkdir_dash_p(os.path.dirname(outfilename)) + try: + outfile = open(outfilename, 'wb') + except Exception as error: + abort('Open(%s) failed: %s' % (outfilename, error)) if args.hash: digestor = hashlib.new(args.hash) try: @@ -219,8 +225,8 @@ for s,f,outfilename in todo: sys.stderr.write("%s %s/%s\n" % (digestor.hexdigest(), s.name(), f.name())) except KeyboardInterrupt: - if outfile and outfile != '/dev/stdout': - os.unlink(outfilename) + if outfile and (outfile.fileno() > 2) and not outfile.closed: + os.unlink(outfile.name) break if args.progress: