11642: Increase arv-put retries. Job considered failure if unable to save job log.
authorPeter Amstutz <peter.amstutz@curoverse.com>
Fri, 9 Jun 2017 17:27:51 +0000 (13:27 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Fri, 9 Jun 2017 18:26:30 +0000 (14:26 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curoverse.com>

sdk/cli/bin/crunch-job

index 55e35b04b9cf933f702df4e109eded633230a15a..35a3b1f23bc1222db58c24aaaf05184868153dbf 100755 (executable)
@@ -1130,10 +1130,10 @@ freeze();
 my $collated_output = save_output_collection();
 Log (undef, "finish");
 
-save_meta();
+my $final_log = save_meta();
 
 my $final_state;
-if ($collated_output && $main::success) {
+if ($collated_output && $final_log && $main::success) {
   $final_state = 'Complete';
 } else {
   $final_state = 'Failed';
@@ -1760,7 +1760,7 @@ sub log_writer_start($)
   $log_pipe_pid = open2($log_pipe_out, $log_pipe_in,
                         'arv-put',
                         '--stream',
-                        '--retries', '3',
+                        '--retries', '6',
                         '--filename', $logfilename,
                         '-');
   $log_pipe_out_buf = "";
@@ -1898,6 +1898,8 @@ sub save_meta
     });
   Log(undef, "log collection is " . $log_coll->{portable_data_hash});
   $Job->update_attributes('log' => $log_coll->{portable_data_hash});
+
+  return $log_coll->{portable_data_hash};
 }