X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/988e9052575bc93db2909f3b9ef576f1043eddcb..0ecea550fde014578e71004360b700cdfeae4909:/sdk/python/arvados/commands/get.py diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py index bf084419ef..3bf929584e 100755 --- a/sdk/python/arvados/commands/get.py +++ b/sdk/python/arvados/commands/get.py @@ -10,6 +10,7 @@ import logging import arvados import arvados.commands._util as arv_cmd +import arvados.util as util from arvados._version import __version__ @@ -84,6 +85,11 @@ write *anything* if any files exist that would have to be overwritten. This option causes even devices, sockets, and fifos to be skipped. """) +group.add_argument('--strip-manifest', action='store_true', default=False, + help=""" +When getting a collection manifest, strip its access tokens before writing +it. +""") def parse_arguments(arguments, stdout, stderr): args = parser.parse_args(arguments) @@ -131,16 +137,17 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): api_client = arvados.api('v1') r = re.search(r'^(.*?)(/.*)?$', args.locator) - collection = r.group(1) + col_loc = r.group(1) get_prefix = r.group(2) if args.r and not get_prefix: get_prefix = os.sep try: - reader = arvados.CollectionReader(collection, num_retries=args.retries) + reader = arvados.CollectionReader(col_loc, num_retries=args.retries) except Exception as error: logger.error("failed to read collection: {}".format(error)) return 1 + # User asked to download the collection's manifest if not get_prefix: if not args.n: open_flags = os.O_CREAT | os.O_WRONLY @@ -148,16 +155,16 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): open_flags |= os.O_EXCL try: if args.destination == "-": - stdout.write(reader.manifest_text()) + stdout.write(reader.manifest_text(strip=args.strip_manifest)) else: out_fd = os.open(args.destination, open_flags) with os.fdopen(out_fd, 'wb') as out_file: - out_file.write(reader.manifest_text()) + out_file.write(reader.manifest_text(strip=args.strip_manifest)) except (IOError, OSError) as error: logger.error("can't write to '{}': {}".format(args.destination, error)) return 1 except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error: - logger.error("failed to download '{}': {}".format(collection, error)) + logger.error("failed to download '{}': {}".format(col_loc, error)) return 1 return 0 @@ -166,28 +173,39 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): todo = [] todo_bytes = 0 try: - for s, f in files_in_collection(reader): - if get_prefix and get_prefix[-1] == os.sep: - if not os.path.join(s.stream_name(), - f.name).startswith('.' + get_prefix): - continue - if args.destination == "-": - dest_path = "-" - else: - dest_path = os.path.join( - args.destination, - os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:]) - if (not (args.n or args.f or args.skip_existing) and - os.path.exists(dest_path)): - logger.error('Local file %s already exists.' % (dest_path,)) - return 1 - else: - if os.path.join(s.stream_name(), f.name) != '.' + get_prefix: - continue - dest_path = args.destination - todo += [(s, f, dest_path)] - todo_bytes += f.size() - except arvados.errors.NotFoundError as e: + if get_prefix == os.sep: + item = reader + else: + item = reader.find('.' + get_prefix) + + if isinstance(item, arvados.collection.Subcollection) or isinstance(item, arvados.collection.CollectionReader): + # If the user asked for a file and we got a subcollection, error out. + if get_prefix[-1] != os.sep: + logger.error("requested file '{}' is in fact a subcollection. Append a trailing '/' to download it.".format('.' + get_prefix)) + return 1 + # If the user asked stdout as a destination, error out. + elif args.destination == '-': + logger.error("cannot use 'stdout' as destination when downloading multiple files.") + return 1 + # User asked for a subcollection, and that's what was found. Add up total size + # to download. + for s, f in files_in_collection(item): + dest_path = os.path.join( + args.destination, + os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:]) + if (not (args.n or args.f or args.skip_existing) and + os.path.exists(dest_path)): + logger.error('Local file %s already exists.' % (dest_path,)) + return 1 + todo += [(s, f, dest_path)] + todo_bytes += f.size() + elif isinstance(item, arvados.arvfile.ArvadosFile): + todo += [(item.parent, item, args.destination)] + todo_bytes += item.size() + else: + logger.error("'{}' not found.".format('.' + get_prefix)) + return 1 + except (IOError, arvados.errors.NotFoundError) as e: logger.error(e) return 1 @@ -244,11 +262,12 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): os.unlink(outfile.name) break finally: - if outfile is not stdout: + if outfile != None and outfile != stdout: outfile.close() if args.progress: stderr.write('\n') + return 0 def files_in_collection(c): # Sort first by file type, then alphabetically by file path.