11507: Cleanup

[arvados.git] / sdk / python / arvados / commands / get.py
diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py

index bf084419ef09e82b616ae2caef455fd6d6788ed3..3bf929584ea4e22968b0e29a07044c7e1e95a744 100755 (executable)
--- a/sdk/python/arvados/commands/get.py
+++ b/sdk/python/arvados/commands/get.py
@@ -10,6 +10,7 @@ import logging
  
  import arvados
  import arvados.commands._util as arv_cmd
+import arvados.util as util
  
  from arvados._version import __version__
  
@@ -84,6 +85,11 @@ write *anything* if any files exist that would have to be
  overwritten. This option causes even devices, sockets, and fifos to be
  skipped.
  """)
+group.add_argument('--strip-manifest', action='store_true', default=False,
+                   help="""
+When getting a collection manifest, strip its access tokens before writing
+it.
+""")
  
  def parse_arguments(arguments, stdout, stderr):
      args = parser.parse_args(arguments)
@@ -131,16 +137,17 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
          api_client = arvados.api('v1')
  
      r = re.search(r'^(.*?)(/.*)?$', args.locator)
-    collection = r.group(1)
+    col_loc = r.group(1)
      get_prefix = r.group(2)
      if args.r and not get_prefix:
          get_prefix = os.sep
      try:
-        reader = arvados.CollectionReader(collection, num_retries=args.retries)
+        reader = arvados.CollectionReader(col_loc, num_retries=args.retries)
      except Exception as error:
          logger.error("failed to read collection: {}".format(error))
          return 1
  
+    # User asked to download the collection's manifest
      if not get_prefix:
          if not args.n:
              open_flags = os.O_CREAT | os.O_WRONLY
@@ -148,16 +155,16 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
                  open_flags |= os.O_EXCL
              try:
                  if args.destination == "-":
-                    stdout.write(reader.manifest_text())
+                    stdout.write(reader.manifest_text(strip=args.strip_manifest))
                  else:
                      out_fd = os.open(args.destination, open_flags)
                      with os.fdopen(out_fd, 'wb') as out_file:
-                        out_file.write(reader.manifest_text())
+                        out_file.write(reader.manifest_text(strip=args.strip_manifest))
              except (IOError, OSError) as error:
                  logger.error("can't write to '{}': {}".format(args.destination, error))
                  return 1
              except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
-                logger.error("failed to download '{}': {}".format(collection, error))
+                logger.error("failed to download '{}': {}".format(col_loc, error))
                  return 1
          return 0
  
@@ -166,28 +173,39 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
      todo = []
      todo_bytes = 0
      try:
-        for s, f in files_in_collection(reader):
-            if get_prefix and get_prefix[-1] == os.sep:
-                if not os.path.join(s.stream_name(),
-                                    f.name).startswith('.' + get_prefix):
-                    continue
-                if args.destination == "-":
-                    dest_path = "-"
-                else:
-                    dest_path = os.path.join(
-                        args.destination,
-                        os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:])
-                    if (not (args.n or args.f or args.skip_existing) and
-                        os.path.exists(dest_path)):
-                        logger.error('Local file %s already exists.' % (dest_path,))
-                        return 1
-            else:
-                if os.path.join(s.stream_name(), f.name) != '.' + get_prefix:
-                    continue
-                dest_path = args.destination
-            todo += [(s, f, dest_path)]
-            todo_bytes += f.size()
-    except arvados.errors.NotFoundError as e:
+        if get_prefix == os.sep:
+            item = reader
+        else:
+            item = reader.find('.' + get_prefix)
+
+        if isinstance(item, arvados.collection.Subcollection) or isinstance(item, arvados.collection.CollectionReader):
+            # If the user asked for a file and we got a subcollection, error out.
+            if get_prefix[-1] != os.sep:
+                logger.error("requested file '{}' is in fact a subcollection. Append a trailing '/' to download it.".format('.' + get_prefix))
+                return 1
+            # If the user asked stdout as a destination, error out.
+            elif args.destination == '-':
+                logger.error("cannot use 'stdout' as destination when downloading multiple files.")
+                return 1
+            # User asked for a subcollection, and that's what was found. Add up total size
+            # to download.
+            for s, f in files_in_collection(item):
+                dest_path = os.path.join(
+                    args.destination,
+                    os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:])
+                if (not (args.n or args.f or args.skip_existing) and
+                    os.path.exists(dest_path)):
+                    logger.error('Local file %s already exists.' % (dest_path,))
+                    return 1
+                todo += [(s, f, dest_path)]
+                todo_bytes += f.size()
+        elif isinstance(item, arvados.arvfile.ArvadosFile):
+            todo += [(item.parent, item, args.destination)]
+            todo_bytes += item.size()
+        else:
+            logger.error("'{}' not found.".format('.' + get_prefix))
+            return 1
+    except (IOError, arvados.errors.NotFoundError) as e:
          logger.error(e)
          return 1
  
@@ -244,11 +262,12 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
                  os.unlink(outfile.name)
              break
          finally:
-            if outfile is not stdout:
+            if outfile != None and outfile != stdout:
                  outfile.close()
  
      if args.progress:
          stderr.write('\n')
+    return 0
  
  def files_in_collection(c):
      # Sort first by file type, then alphabetically by file path.