use Fcntl ':flock';
use File::Path qw( make_path );
+use constant EX_TEMPFAIL => 75;
+
$ENV{"TMPDIR"} ||= "/tmp";
unless (defined $ENV{"CRUNCH_TMP"}) {
$ENV{"CRUNCH_TMP"} = $ENV{"TMPDIR"} . "/crunch-job";
{
$Job = $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
if (!$force_unlock) {
+ # If some other crunch-job process has grabbed this job (or we see
+ # other evidence that the job is already underway) we exit
+ # EX_TEMPFAIL so crunch-dispatch (our parent process) doesn't
+ # mark the job as failed.
if ($Job->{'is_locked_by_uuid'}) {
- croak("Job is locked: " . $Job->{'is_locked_by_uuid'});
+ Log(undef, "Job is locked by " . $Job->{'is_locked_by_uuid'});
+ exit EX_TEMPFAIL;
}
if ($Job->{'success'} ne undef) {
- croak("Job 'success' flag (" . $Job->{'success'} . ") is not null");
+ Log(undef, "Job 'success' flag (" . $Job->{'success'} . ") is not null");
+ exit EX_TEMPFAIL;
}
if ($Job->{'running'}) {
- croak("Job 'running' flag is already set");
+ Log(undef, "Job 'running' flag is already set");
+ exit EX_TEMPFAIL;
}
if ($Job->{'started_at'}) {
- croak("Job 'started_at' time is already set (" . $Job->{'started_at'} . ")");
+ Log(undef, "Job 'started_at' time is already set (" . $Job->{'started_at'} . ")");
+ exit EX_TEMPFAIL;
}
}
}
# Claim this job, and make sure nobody else does
unless ($Job->update_attributes('is_locked_by_uuid' => $User->{'uuid'}) &&
$Job->{'is_locked_by_uuid'} == $User->{'uuid'}) {
- croak("Error while updating / locking job");
+ Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
+ exit EX_TEMPFAIL;
}
$Job->update_attributes('started_at' => scalar gmtime,
'running' => 1,
if ($cleanpid == 0)
{
srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
- ['bash', '-c', 'if mount | grep -q $JOB_WORK/; then sudo /bin/umount $JOB_WORK/* 2>/dev/null; fi; sleep 1; rm -rf $JOB_WORK $CRUNCH_TMP/opt $CRUNCH_TMP/src*']);
+ ['bash', '-c', 'if mount | grep -q $JOB_WORK/; then for i in $JOB_WORK/*keep; do /bin/fusermount -z -u $i; done; fi; sleep 1; rm -rf $JOB_WORK $CRUNCH_TMP/opt $CRUNCH_TMP/src*']);
exit (1);
}
while (1)
my $build_script_to_send = "";
my $command =
"if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; "
- ."mkdir -p $ENV{JOB_WORK} $ENV{CRUNCH_TMP} $ENV{TASK_WORK} $ENV{TASK_KEEPMOUNT}"
+ ."mkdir -p $ENV{CRUNCH_TMP} $ENV{JOB_WORK} $ENV{TASK_WORK} $ENV{TASK_KEEPMOUNT} "
."&& cd $ENV{CRUNCH_TMP} ";
if ($build_script)
{
while (my ($env_key, $env_val) = each %ENV)
{
if ($env_key =~ /^(ARVADOS|JOB|TASK)_/) {
- if ($env_key eq "TASK_WORK") {
- $command .= "--env=\QTASK_WORK=/tmp/crunch-job\E ";
- }
- elsif ($env_key eq "TASK_KEEPMOUNT") {
+ if ($env_key eq "TASK_KEEPMOUNT") {
$command .= "--env=\QTASK_KEEPMOUNT=/keep\E ";
}
else {
my @execargs = ('bash', '-c', $command);
srun (\@srunargs, \@execargs, undef, $build_script_to_send);
- exit (111);
+ # exec() failed, we assume nothing happened.
+ Log(undef, "srun() failed on build script");
+ die;
}
close("writer");
if (!defined $childpid)
Log (undef, "finish");
save_meta();
-exit 0;
+exit ($Job->{'success'} ? 1 : 0);
my $joboutput;
for (@jobstep)
{
- next if (!exists $_->{'arvados_task'}->{output} ||
- !$_->{'arvados_task'}->{'success'} ||
- $_->{'exitcode'} != 0);
+ next if (!exists $_->{'arvados_task'}->{'output'} ||
+ !$_->{'arvados_task'}->{'success'});
my $output = $_->{'arvados_task'}->{output};
if ($output !~ /^[0-9a-f]{32}(\+\S+)*$/)
{
# checkout-and-build
use Fcntl ':flock';
+use File::Path qw( make_path );
my $destdir = $ENV{"CRUNCH_SRC"};
my $commit = $ENV{"CRUNCH_SRC_COMMIT"};
my $repo = $ENV{"CRUNCH_SRC_URL"};
+my $task_work = $ENV{"TASK_WORK"};
+
+make_path $task_work or die "Failed to create temporary working directory ($task_work): $!";
open L, ">", "$destdir.lock" or die "$destdir.lock: $!";
flock L, LOCK_EX;