4358: fixed: the provenance graph was being generated twice, the second time for...

[arvados.git] / sdk / python / bin / arv-get
diff --git a/sdk/python/bin/arv-get b/sdk/python/bin/arv-get

index c1b2bcbaf5a0dcb0ae2914eb7c145a31fab3efaa..272fa84a430f4c8c54f5d8d33a75b13f7525a81e 100755 (executable)
--- a/sdk/python/bin/arv-get
+++ b/sdk/python/bin/arv-get
@@ -9,6 +9,7 @@ import sys
  import logging
  
  import arvados
+import arvados.commands._util as arv_cmd
  
  logger = logging.getLogger('arvados.arv-get')
  
@@ -17,7 +18,8 @@ def abort(msg, code=1):
      exit(code)
  
  parser = argparse.ArgumentParser(
-    description='Copy data from Keep to a local file or pipe.')
+    description='Copy data from Keep to a local file or pipe.',
+    parents=[arv_cmd.retry_opt])
  parser.add_argument('locator', type=str,
                      help="""
  Collection locator, optionally with a file path or prefix.
@@ -31,8 +33,10 @@ group = parser.add_mutually_exclusive_group()
  group.add_argument('--progress', action='store_true',
                     help="""
  Display human-readable progress on stderr (bytes and, if possible,
-percentage of total data size). This is the default behavior when
-stderr is a tty and stdout is not a tty.
+percentage of total data size). This is the default behavior when it
+is not expected to interfere with the output: specifically, stderr is
+a tty _and_ either stdout is not a tty, or output is being written to
+named files rather than stdout.
  """)
  group.add_argument('--no-progress', action='store_true',
                     help="""
@@ -96,12 +100,6 @@ if not args.r and (os.path.isdir(args.destination) or
      logger.debug("Appended source file name to destination directory: %s",
                   args.destination)
  
-# Turn on --progress by default if stderr is a tty and stdout isn't.
-if (not (args.batch_progress or args.no_progress)
-    and os.isatty(sys.stderr.fileno())
-    and not os.isatty(sys.stdout.fileno())):
-    args.progress = True
-
  if args.destination == '-':
      args.destination = '/dev/stdout'
  if args.destination == '/dev/stdout':
@@ -112,41 +110,45 @@ if args.destination == '/dev/stdout':
  else:
      args.destination = args.destination.rstrip(os.sep)
  
+# Turn on --progress by default if stderr is a tty and output is
+# either going to a named file, or going (via stdout) to something
+# that isn't a tty.
+if (not (args.batch_progress or args.no_progress)
+    and sys.stderr.isatty()
+    and (args.destination != '/dev/stdout'
+         or not sys.stdout.isatty())):
+    args.progress = True
+
  
  r = re.search(r'^(.*?)(/.*)?$', args.locator)
  collection = r.group(1)
  get_prefix = r.group(2)
  if args.r and not get_prefix:
      get_prefix = os.sep
+api_client = arvados.api('v1')
+reader = arvados.CollectionReader(collection, num_retries=args.retries)
  
-todo = []
-todo_bytes = 0
  if not get_prefix:
-    try:
-        if not args.n:
-            if not args.f and os.path.exists(args.destination):
-                abort('Local file %s already exists.' % (args.destination,))
-            with open(args.destination, 'wb') as f:
-                try:
-                    c = arvados.api('v1').collections().get(
-                        uuid=collection).execute()
-                    manifest = c['manifest_text']
-                except Exception as e:
-                    logger.warning(
-                        "Collection %s not found. " +
-                        "Trying to fetch directly from Keep (deprecated).",
-                        collection)
-                    manifest = arvados.Keep.get(collection)
-                f.write(manifest)
-        sys.exit(0)
-    except arvados.errors.NotFoundError as e:
-        abort(e)
-
-reader = arvados.CollectionReader(collection)
+    if not args.n:
+        open_flags = os.O_CREAT | os.O_WRONLY
+        if not args.f:
+            open_flags |= os.O_EXCL
+        try:
+            out_fd = os.open(args.destination, open_flags)
+            with os.fdopen(out_fd, 'wb') as out_file:
+                out_file.write(reader.manifest_text())
+        except (IOError, OSError) as error:
+            abort("can't write to '{}': {}".format(args.destination, error))
+        except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
+            abort("failed to download '{}': {}".format(collection, error))
+    sys.exit(0)
+
+reader.normalize()
  
  # Scan the collection. Make an array of (stream, file, local
  # destination filename) tuples, and add up total size to extract.
-
+todo = []
+todo_bytes = 0
  try:
      for s in reader.all_streams():
          for f in s.all_files():
@@ -188,8 +190,8 @@ for s,f,outfilename in todo:
              arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
          try:
              outfile = open(outfilename, 'wb')
-        except Exception as e:
-            abort('Open(%s) failed: %s' % (outfilename, e))
+        except Exception as error:
+            abort('Open(%s) failed: %s' % (outfilename, error))
      if args.hash:
          digestor = hashlib.new(args.hash)
      try: