import arvados
import arvados.commands._util as arv_cmd
+import arvados.util as util
from arvados._version import __version__
overwritten. This option causes even devices, sockets, and fifos to be
skipped.
""")
+group.add_argument('--strip-manifest', action='store_true', default=False,
+ help="""
+When getting a collection manifest, strip its access tokens before writing
+it.
+""")
def parse_arguments(arguments, stdout, stderr):
args = parser.parse_args(arguments)
api_client = arvados.api('v1')
r = re.search(r'^(.*?)(/.*)?$', args.locator)
- collection = r.group(1)
+ col_loc = r.group(1)
get_prefix = r.group(2)
if args.r and not get_prefix:
get_prefix = os.sep
try:
- reader = arvados.CollectionReader(collection, num_retries=args.retries)
+ reader = arvados.CollectionReader(col_loc, num_retries=args.retries)
except Exception as error:
logger.error("failed to read collection: {}".format(error))
return 1
+ # User asked to download the collection's manifest
if not get_prefix:
if not args.n:
open_flags = os.O_CREAT | os.O_WRONLY
open_flags |= os.O_EXCL
try:
if args.destination == "-":
- stdout.write(reader.manifest_text())
+ stdout.write(reader.manifest_text(strip=args.strip_manifest))
else:
out_fd = os.open(args.destination, open_flags)
with os.fdopen(out_fd, 'wb') as out_file:
- out_file.write(reader.manifest_text())
+ out_file.write(reader.manifest_text(strip=args.strip_manifest))
except (IOError, OSError) as error:
logger.error("can't write to '{}': {}".format(args.destination, error))
return 1
except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
- logger.error("failed to download '{}': {}".format(collection, error))
+ logger.error("failed to download '{}': {}".format(col_loc, error))
return 1
return 0
todo = []
todo_bytes = 0
try:
- for s, f in files_in_collection(reader):
- if get_prefix and get_prefix[-1] == os.sep:
- if not os.path.join(s.stream_name(),
- f.name).startswith('.' + get_prefix):
- continue
- if args.destination == "-":
- dest_path = "-"
- else:
- dest_path = os.path.join(
- args.destination,
- os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:])
- if (not (args.n or args.f or args.skip_existing) and
- os.path.exists(dest_path)):
- logger.error('Local file %s already exists.' % (dest_path,))
- return 1
- else:
- if os.path.join(s.stream_name(), f.name) != '.' + get_prefix:
- continue
- dest_path = args.destination
- todo += [(s, f, dest_path)]
- todo_bytes += f.size()
- except arvados.errors.NotFoundError as e:
+ if get_prefix == os.sep:
+ item = reader
+ else:
+ item = reader.find('.' + get_prefix)
+
+ if isinstance(item, arvados.collection.Subcollection) or isinstance(item, arvados.collection.CollectionReader):
+ # If the user asked for a file and we got a subcollection, error out.
+ if get_prefix[-1] != os.sep:
+ logger.error("requested file '{}' is in fact a subcollection. Append a trailing '/' to download it.".format('.' + get_prefix))
+ return 1
+ # If the user asked stdout as a destination, error out.
+ elif args.destination == '-':
+ logger.error("cannot use 'stdout' as destination when downloading multiple files.")
+ return 1
+ # User asked for a subcollection, and that's what was found. Add up total size
+ # to download.
+ for s, f in files_in_collection(item):
+ dest_path = os.path.join(
+ args.destination,
+ os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:])
+ if (not (args.n or args.f or args.skip_existing) and
+ os.path.exists(dest_path)):
+ logger.error('Local file %s already exists.' % (dest_path,))
+ return 1
+ todo += [(s, f, dest_path)]
+ todo_bytes += f.size()
+ elif isinstance(item, arvados.arvfile.ArvadosFile):
+ todo += [(item.parent, item, args.destination)]
+ todo_bytes += item.size()
+ else:
+ logger.error("'{}' not found.".format('.' + get_prefix))
+ return 1
+ except (IOError, arvados.errors.NotFoundError) as e:
logger.error(e)
return 1
os.unlink(outfile.name)
break
finally:
- if outfile is not stdout:
+ if outfile != None and outfile != stdout:
outfile.close()
if args.progress:
stderr.write('\n')
+ return 0
def files_in_collection(c):
# Sort first by file type, then alphabetically by file path.