16046: Replace slash with space for "repo/image" collection name

[arvados.git] / sdk / python / arvados / commands / get.py
diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py

index 67f38c4cbd635c64d4a0c7821ae50310b307de0c..1e527149168daa8d1a892abf0638517936891d79 100755 (executable)
--- a/sdk/python/arvados/commands/get.py
+++ b/sdk/python/arvados/commands/get.py
@@ -1,4 +1,7 @@
  #!/usr/bin/env python
  #!/usr/bin/env python
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
  
  import argparse
  import hashlib
  
  import argparse
  import hashlib
@@ -10,6 +13,7 @@ import logging
  
  import arvados
  import arvados.commands._util as arv_cmd
  
  import arvados
  import arvados.commands._util as arv_cmd
+import arvados.util as util
  
  from arvados._version import __version__
  
  
  from arvados._version import __version__
  
@@ -77,6 +81,10 @@ Overwrite existing files while writing. The default behavior is to
  refuse to write *anything* if any of the output files already
  exist. As a special case, -f is not needed to write to stdout.
  """)
  refuse to write *anything* if any of the output files already
  exist. As a special case, -f is not needed to write to stdout.
  """)
+group.add_argument('-v', action='count', default=0,
+                    help="""
+Once for verbose mode, twice for debug mode.
+""")
  group.add_argument('--skip-existing', action='store_true',
                     help="""
  Skip files that already exist. The default behavior is to refuse to
  group.add_argument('--skip-existing', action='store_true',
                     help="""
  Skip files that already exist. The default behavior is to refuse to
@@ -84,6 +92,11 @@ write *anything* if any files exist that would have to be
  overwritten. This option causes even devices, sockets, and fifos to be
  skipped.
  """)
  overwritten. This option causes even devices, sockets, and fifos to be
  skipped.
  """)
+group.add_argument('--strip-manifest', action='store_true', default=False,
+                   help="""
+When getting a collection manifest, strip its access tokens before writing
+it.
+""")
  
  def parse_arguments(arguments, stdout, stderr):
      args = parser.parse_args(arguments)
  
  def parse_arguments(arguments, stdout, stderr):
      args = parser.parse_args(arguments)
@@ -125,18 +138,27 @@ def parse_arguments(arguments, stdout, stderr):
  
  def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
      global api_client
  
  def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
      global api_client
-    
+
+    if stdout is sys.stdout and hasattr(stdout, 'buffer'):
+        # in Python 3, write to stdout as binary
+        stdout = stdout.buffer
+
      args = parse_arguments(arguments, stdout, stderr)
      args = parse_arguments(arguments, stdout, stderr)
+    logger.setLevel(logging.WARNING - 10 * args.v)
+
+    request_id = arvados.util.new_request_id()
+    logger.info('X-Request-Id: '+request_id)
+
      if api_client is None:
      if api_client is None:
-        api_client = arvados.api('v1')
+        api_client = arvados.api('v1', request_id=request_id)
  
      r = re.search(r'^(.*?)(/.*)?$', args.locator)
  
      r = re.search(r'^(.*?)(/.*)?$', args.locator)
-    collection = r.group(1)
+    col_loc = r.group(1)
      get_prefix = r.group(2)
      if args.r and not get_prefix:
          get_prefix = os.sep
      get_prefix = r.group(2)
      if args.r and not get_prefix:
          get_prefix = os.sep
-    reader = arvados.CollectionReader(collection, num_retries=args.retries)
  
  
+    # User asked to download the collection's manifest
      if not get_prefix:
          if not args.n:
              open_flags = os.O_CREAT | os.O_WRONLY
      if not get_prefix:
          if not args.n:
              open_flags = os.O_CREAT | os.O_WRONLY
@@ -144,46 +166,74 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
                  open_flags |= os.O_EXCL
              try:
                  if args.destination == "-":
                  open_flags |= os.O_EXCL
              try:
                  if args.destination == "-":
-                    stdout.write(reader.manifest_text())
+                    write_block_or_manifest(
+                        dest=stdout, src=col_loc,
+                        api_client=api_client, args=args)
                  else:
                      out_fd = os.open(args.destination, open_flags)
                      with os.fdopen(out_fd, 'wb') as out_file:
                  else:
                      out_fd = os.open(args.destination, open_flags)
                      with os.fdopen(out_fd, 'wb') as out_file:
-                        out_file.write(reader.manifest_text())
+                        write_block_or_manifest(
+                            dest=out_file, src=col_loc,
+                            api_client=api_client, args=args)
              except (IOError, OSError) as error:
                  logger.error("can't write to '{}': {}".format(args.destination, error))
                  return 1
              except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
              except (IOError, OSError) as error:
                  logger.error("can't write to '{}': {}".format(args.destination, error))
                  return 1
              except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
-                logger.error("failed to download '{}': {}".format(collection, error))
+                logger.error("failed to download '{}': {}".format(col_loc, error))
                  return 1
                  return 1
+            except arvados.errors.ArgumentError as error:
+                if 'Argument to CollectionReader' in str(error):
+                    logger.error("error reading collection: {}".format(error))
+                    return 1
+                else:
+                    raise
          return 0
  
          return 0
  
+    try:
+        reader = arvados.CollectionReader(
+            col_loc, api_client=api_client, num_retries=args.retries)
+    except Exception as error:
+        logger.error("failed to read collection: {}".format(error))
+        return 1
+
      # Scan the collection. Make an array of (stream, file, local
      # destination filename) tuples, and add up total size to extract.
      todo = []
      todo_bytes = 0
      try:
      # Scan the collection. Make an array of (stream, file, local
      # destination filename) tuples, and add up total size to extract.
      todo = []
      todo_bytes = 0
      try:
-        for s, f in files_in_collection(reader):
-            if get_prefix and get_prefix[-1] == os.sep:
-                if 0 != string.find(os.path.join(s.stream_name(), f.name),
-                                    '.' + get_prefix):
-                    continue
-                if args.destination == "-":
-                    dest_path = "-"
-                else:
-                    dest_path = os.path.join(
-                        args.destination,
-                        os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:])
-                    if (not (args.n or args.f or args.skip_existing) and
-                        os.path.exists(dest_path)):
-                        logger.error('Local file %s already exists.' % (dest_path,))
-                        return 1
-            else:
-                if os.path.join(s.stream_name(), f.name) != '.' + get_prefix:
-                    continue
-                dest_path = args.destination
-            todo += [(s, f, dest_path)]
-            todo_bytes += f.size()
-    except arvados.errors.NotFoundError as e:
+        if get_prefix == os.sep:
+            item = reader
+        else:
+            item = reader.find('.' + get_prefix)
+
+        if isinstance(item, arvados.collection.Subcollection) or isinstance(item, arvados.collection.CollectionReader):
+            # If the user asked for a file and we got a subcollection, error out.
+            if get_prefix[-1] != os.sep:
+                logger.error("requested file '{}' is in fact a subcollection. Append a trailing '/' to download it.".format('.' + get_prefix))
+                return 1
+            # If the user asked stdout as a destination, error out.
+            elif args.destination == '-':
+                logger.error("cannot use 'stdout' as destination when downloading multiple files.")
+                return 1
+            # User asked for a subcollection, and that's what was found. Add up total size
+            # to download.
+            for s, f in files_in_collection(item):
+                dest_path = os.path.join(
+                    args.destination,
+                    os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:])
+                if (not (args.n or args.f or args.skip_existing) and
+                    os.path.exists(dest_path)):
+                    logger.error('Local file %s already exists.' % (dest_path,))
+                    return 1
+                todo += [(s, f, dest_path)]
+                todo_bytes += f.size()
+        elif isinstance(item, arvados.arvfile.ArvadosFile):
+            todo += [(item.parent, item, args.destination)]
+            todo_bytes += item.size()
+        else:
+            logger.error("'{}' not found.".format('.' + get_prefix))
+            return 1
+    except (IOError, arvados.errors.NotFoundError) as e:
          logger.error(e)
          return 1
  
          logger.error(e)
          return 1
  
@@ -214,7 +264,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
          if args.hash:
              digestor = hashlib.new(args.hash)
          try:
          if args.hash:
              digestor = hashlib.new(args.hash)
          try:
-            with s.open(f.name, 'r') as file_reader:
+            with s.open(f.name, 'rb') as file_reader:
                  for data in file_reader.readall():
                      if outfile:
                          outfile.write(data)
                  for data in file_reader.readall():
                      if outfile:
                          outfile.write(data)
@@ -239,13 +289,17 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
              if outfile and (outfile.fileno() > 2) and not outfile.closed:
                  os.unlink(outfile.name)
              break
              if outfile and (outfile.fileno() > 2) and not outfile.closed:
                  os.unlink(outfile.name)
              break
+        finally:
+            if outfile != None and outfile != stdout:
+                outfile.close()
  
      if args.progress:
          stderr.write('\n')
  
      if args.progress:
          stderr.write('\n')
+    return 0
  
  def files_in_collection(c):
      # Sort first by file type, then alphabetically by file path.
  
  def files_in_collection(c):
      # Sort first by file type, then alphabetically by file path.
-    for i in sorted(c.keys(),
+    for i in sorted(list(c.keys()),
                      key=lambda k: (
                          isinstance(c[k], arvados.collection.Subcollection),
                          k.upper())):
                      key=lambda k: (
                          isinstance(c[k], arvados.collection.Subcollection),
                          k.upper())):
@@ -254,3 +308,14 @@ def files_in_collection(c):
          elif isinstance(c[i], arvados.collection.Subcollection):
              for s, f in files_in_collection(c[i]):
                  yield (s, f)
          elif isinstance(c[i], arvados.collection.Subcollection):
              for s, f in files_in_collection(c[i]):
                  yield (s, f)
+
+def write_block_or_manifest(dest, src, api_client, args):
+    if '+A' in src:
+        # block locator
+        kc = arvados.keep.KeepClient(api_client=api_client)
+        dest.write(kc.get(src, num_retries=args.retries))
+    else:
+        # collection UUID or portable data hash
+        reader = arvados.CollectionReader(
+            src, api_client=api_client, num_retries=args.retries)
+        dest.write(reader.manifest_text(strip=args.strip_manifest).encode())