-#!/usr/bin/perl
+#!/usr/bin/env perl
# -*- mode: perl; perl-indent-level: 2; indent-tabs-mode: nil; -*-
=head1 NAME
my $job_api_token;
my $no_clear_tmp;
my $resume_stash;
-my $docker_bin = "/usr/bin/docker.io";
+my $docker_bin = "docker.io";
GetOptions('force-unlock' => \$force_unlock,
'git-dir=s' => \$git_dir,
'job=s' => \$jobspec,
}
else
{
- $Job = JSON::decode_json($jobspec);
- $local_job = 1;
+ $local_job = JSON::decode_json($jobspec);
}
# at least able to run basic commands: they aren't down or severely
# misconfigured.
my $cmd = ['true'];
-if ($Job->{docker_image_locator}) {
+if (($Job || $local_job)->{docker_image_locator}) {
$cmd = [$docker_bin, 'ps', '-q'];
}
Log(undef, "Sanity check is `@$cmd`");
{
if (!$resume_stash)
{
- map { croak ("No $_ specified") unless $Job->{$_} }
+ map { croak ("No $_ specified") unless $local_job->{$_} }
qw(script script_version script_parameters);
}
- $Job->{'is_locked_by_uuid'} = $User->{'uuid'};
- $Job->{'started_at'} = gmtime;
- $Job->{'state'} = 'Running';
+ $local_job->{'is_locked_by_uuid'} = $User->{'uuid'};
+ $local_job->{'started_at'} = gmtime;
+ $local_job->{'state'} = 'Running';
- $Job = api_call("jobs/create", job => $Job);
+ $Job = api_call("jobs/create", job => $local_job);
}
$job_id = $Job->{'uuid'};
my $cleanpid = fork();
if ($cleanpid == 0)
{
- # Find FUSE mounts that look like Keep mounts (the mount path has the
- # word "keep") and unmount them. Then clean up work directories.
- # TODO: When #5036 is done and widely deployed, we can get rid of the
- # regular expression and just unmount everything with type fuse.keep.
+ # Find FUSE mounts under $CRUNCH_TMP and unmount them.
+ # Then clean up work directories.
+ # TODO: When #5036 is done and widely deployed, we can limit mount's
+ # -t option to simply fuse.keep.
srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
- ['bash', '-ec', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
+ ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
exit (1);
}
while (1)
freeze_if_want_freeze ($cleanpid);
select (undef, undef, undef, 0.1);
}
- Log (undef, "Cleanup command exited ".exit_status_s($?));
+ if ($?) {
+ Log(undef, "Clean work dirs: exit ".exit_status_s($?));
+ exit(EX_RETRY_UNLOCKED);
+ }
}
# If this job requires a Docker image, install that.
unless ($? == 0 && $sha1 =~ /^([0-9a-f]{40})$/) {
croak("`$gitcmd rev-list` exited "
.exit_status_s($?)
- .", '$treeish' not found. Giving up.");
+ .", '$treeish' not found, giving up");
}
$commit = $1;
Log(undef, "Version $treeish is commit $commit");
$command .= "--memory=\${MEMLIMIT}k --memory-swap=\${SWAPLIMIT}k ";
}
- # Dynamically configure the container to use the host system as its
- # DNS server. Get the host's global addresses from the ip command,
- # and turn them into docker --dns options using gawk.
- $command .=
- q{$(ip -o address show scope global |
- gawk 'match($4, /^([0-9\.:]+)\//, x){print "--dns", x[1]}') };
-
# The source tree and $destdir directory (which we have
# installed on the worker host) are available in the container,
# under the same path.
update_progress_stats();
select (undef, undef, undef, 0.1);
}
- elsif (time - $progress_stats_updated >= 30)
+ elsif (time - $progress_stats_updated >= 30 || $progress_is_dirty)
{
update_progress_stats();
}
$progress_stats_updated = time;
return if !$progress_is_dirty;
my ($todo, $done, $running) = (scalar @jobstep_todo,
- scalar @jobstep_done,
- scalar @slot - scalar @freeslot - scalar @holdslot);
+ scalar @jobstep_done,
+ scalar keys(%proc));
$Job->{'tasks_summary'} ||= {};
$Job->{'tasks_summary'}->{'todo'} = $todo;
$Job->{'tasks_summary'}->{'done'} = $done;
}
__DATA__
-#!/usr/bin/perl
+#!/usr/bin/env perl
#
# This is crunch-job's internal dispatch script. crunch-job running on the API
# server invokes this script on individual compute nodes, or localhost if we're
my $python_dir = "$install_dir/python";
if ((-d $python_dir) and can_run("python2.7")) {
open(my $egg_info_pipe, "-|",
- "python2.7 \Q$python_dir/setup.py\E --quiet egg_info 2>&1 >/dev/null");
+ "python2.7 \Q$python_dir/setup.py\E egg_info 2>&1 >/dev/null");
my @egg_info_errors = <$egg_info_pipe>;
close($egg_info_pipe);
+
if ($?) {
- if (@egg_info_errors and ($egg_info_errors[-1] =~ /\bgit\b/)) {
+ if (@egg_info_errors and (($egg_info_errors[-1] =~ /\bgit\b/) or ($egg_info_errors[-1] =~ /\[Errno 2\]/))) {
# egg_info apparently failed because it couldn't ask git for a build tag.
# Specify no build tag.
open(my $pysdk_cfg, ">>", "$python_dir/setup.cfg");
} else {
my $egg_info_exit = $? >> 8;
foreach my $errline (@egg_info_errors) {
- print STDERR_ORIG $errline;
+ warn $errline;
}
warn "python setup.py egg_info failed: exit $egg_info_exit";
exit ($egg_info_exit || 1);