2752: arv-put doesn't update the resume cache when aborted.
authorBrett Smith <brett@curoverse.com>
Fri, 30 May 2014 14:21:59 +0000 (10:21 -0400)
committerBrett Smith <brett@curoverse.com>
Fri, 30 May 2014 14:40:11 +0000 (10:40 -0400)
It's appealing to try to save the latest state when we have the
opportunity, but the problem is that we can't be sure that our
CollectionWriter is in a consistent state when we get a signal or
exception.  The previous code could potentially write inconsistent
state, which would appear to resume successfully but PUT different
data.  Instead rely exclusively on cache updates that are done from a
known consistent state.

sdk/python/arvados/commands/put.py

index d9e401dcbbdfedec60e573346859cb829cc51eea..7f4d4308c1ae551b041ee57f8f9e1f41bdfeb285 100644 (file)
@@ -325,6 +325,9 @@ def progress_writer(progress_func, outfile=sys.stderr):
         outfile.write(progress_func(bytes_written, bytes_expected))
     return write_progress
 
+def exit_signal_handler(sigcode, frame):
+    sys.exit(-sigcode)
+
 def main(arguments=None):
     ResumeCache.setup_user_cache()
     args = parse_arguments(arguments)
@@ -347,12 +350,9 @@ def main(arguments=None):
     writer = ArvPutCollectionWriter.from_cache(
         resume_cache, reporter, expected_bytes_for(args.paths))
 
-    def signal_handler(sigcode, frame):
-        writer.cache_state()
-        sys.exit(-sigcode)
     # Install our signal handler for each code in CAUGHT_SIGNALS, and save
     # the originals.
-    orig_signal_handlers = {sigcode: signal.signal(sigcode, signal_handler)
+    orig_signal_handlers = {sigcode: signal.signal(sigcode, exit_signal_handler)
                             for sigcode in CAUGHT_SIGNALS}
 
     if writer.bytes_written > 0:  # We're resuming a previous upload.
@@ -361,19 +361,15 @@ def main(arguments=None):
                 "         Use the --no-resume option to start over."])
         writer.report_progress()
 
-    try:
-        writer.do_queued_work()  # Do work resumed from cache.
-        for path in args.paths:  # Copy file data to Keep.
-            if os.path.isdir(path):
-                writer.write_directory_tree(
-                    path, max_manifest_depth=args.max_manifest_depth)
-            else:
-                writer.start_new_stream()
-                writer.write_file(path, args.filename or os.path.basename(path))
-        writer.finish_current_stream()
-    except Exception:
-        writer.cache_state()
-        raise
+    writer.do_queued_work()  # Do work resumed from cache.
+    for path in args.paths:  # Copy file data to Keep.
+        if os.path.isdir(path):
+            writer.write_directory_tree(
+                path, max_manifest_depth=args.max_manifest_depth)
+        else:
+            writer.start_new_stream()
+            writer.write_file(path, args.filename or os.path.basename(path))
+    writer.finish_current_stream()
 
     if args.progress:  # Print newline to split stderr from stdout for humans.
         print >>sys.stderr