Log(undef, "Job is locked by " . $Job->{'is_locked_by_uuid'});
exit EX_TEMPFAIL;
}
+ if ($Job->{'state'} ne 'Queued') {
+ Log(undef, "Job state is " . $Job->{'state'} . ", but I can only start queued jobs.");
+ exit EX_TEMPFAIL;
+ }
if ($Job->{'success'} ne undef) {
Log(undef, "Job 'success' flag (" . $Job->{'success'} . ") is not null");
exit EX_TEMPFAIL;
Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
exit EX_TEMPFAIL;
}
- $Job->update_attributes('started_at' => scalar gmtime,
- 'running' => 1,
- 'success' => undef,
+ $Job->update_attributes('state' => 'Running',
'tasks_summary' => { 'failed' => 0,
'todo' => 1,
'running' => 0,
freeze();
my $collated_output = &collate_output();
-if ($job_has_uuid) {
- $Job->update_attributes('running' => 0,
- 'success' => $collated_output && $main::success,
- 'finished_at' => scalar gmtime)
-}
-
if (!$collated_output) {
Log(undef, "output undef");
}
Log (undef, "finish");
save_meta();
-exit ($Job->{'success'} ? 1 : 0);
+
+if ($job_has_uuid) {
+ if ($collated_output && $main::success) {
+ $Job->update_attributes('state' => 'Complete')
+ } else {
+ $Job->update_attributes('state' => 'Failed')
+ }
+}
+
+exit ($Job->{'state'} != 'Complete' ? 1 : 0);
my $Job2 = $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
for my $attr ('cancelled_at',
'cancelled_by_user_uuid',
- 'cancelled_by_client_uuid') {
+ 'cancelled_by_client_uuid',
+ 'state') {
$Job->{$attr} = $Job2->{$attr};
}
- if ($Job->{'cancelled_at'}) {
- Log (undef, "Job cancelled at " . $Job->{cancelled_at} .
- " by user " . $Job->{cancelled_by_user_uuid});
+ if ($Job->{'state'} ne "Running") {
+ if ($Job->{'state'} eq "Cancelled") {
+ Log (undef, "Job cancelled at " . $Job->{'cancelled_at'} . " by user " . $Job->{'cancelled_by_user_uuid'});
+ } else {
+ Log (undef, "Job state unexpectedly changed to " . $Job->{'state'});
+ }
$main::success = 0;
$main::please_freeze = 1;
}
Log (undef, "collate");
my ($child_out, $child_in);
- my $pid = open2($child_out, $child_in, 'arv-put', '--raw');
+ my $pid = open2($child_out, $child_in, 'arv-put', '--raw',
+ '--retries', put_retry_count());
my $joboutput;
for (@jobstep)
{
sub cleanup
{
return if !$job_has_uuid;
- $Job->update_attributes('running' => 0,
- 'success' => 0,
- 'finished_at' => scalar gmtime);
+ if ($Job->{'state'} eq 'Cancelled') {
+ $Job->update_attributes('finished_at' => scalar gmtime);
+ } else {
+ $Job->update_attributes('state' => 'Failed');
+ }
}
return if $justcheckpoint; # checkpointing is not relevant post-Warehouse.pm
$local_logfile->flush;
- my $cmd = "arv-put --portable-data-hash --filename ''\Q$keep_logfile\E "
- . quotemeta($local_logfile->filename);
+ my $retry_count = put_retry_count();
+ my $cmd = "arv-put --portable-data-hash --retries $retry_count " .
+ "--filename ''\Q$keep_logfile\E " . quotemeta($local_logfile->filename);
my $loglocator = `$cmd`;
die "system $cmd failed: $?" if $?;
chomp($loglocator);
}
}
+sub put_retry_count {
+ # Calculate a --retries argument for arv-put that will have it try
+ # approximately as long as this Job has been running.
+ my $stoptime = shift || time;
+ my $starttime = $jobstep[0]->{starttime};
+ my $timediff = defined($starttime) ? ($stoptime - $starttime) : 1;
+ my $retries = 0;
+ while ($timediff >= 2) {
+ $retries++;
+ $timediff /= 2;
+ }
+ return ($retries > 3) ? $retries : 3;
+}
+
__DATA__
#!/usr/bin/perl