# Determine whether this version of Docker supports memory+swap limits.
($exited, $stdout, $stderr) = srun_sync(
- ["srun", "--nodelist=" . $node[0]],
+ ["srun", "--nodes=1"],
[$docker_bin, 'run', '--help'],
{label => "check --memory-swap feature"});
$docker_limitmem = ($stdout =~ /--memory-swap/);
$try_user_arg = "--user=$try_user";
}
my ($exited, $stdout, $stderr) = srun_sync(
- ["srun", "--nodelist=" . $node[0]],
+ ["srun", "--nodes=1"],
["/bin/sh", "-ec",
"$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
{label => $label});
.q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
.q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
- ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP ";
+ ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP "
+ # $VOLUME_CRUNCHRUNNER and $VOLUME_CERTS will be passed unquoted as
+ # arguments to `docker run`. They must contain their own quoting.
+ .q{&& VOLUME_CRUNCHRUNNER="" VOLUME_CERTS="" }
+ .q{&& if which crunchrunner >/dev/null ; then VOLUME_CRUNCHRUNNER=\\"--volume=$(which crunchrunner):/usr/local/bin/crunchrunner\\" ; fi }
+ .q{&& if test -f /etc/ssl/certs/ca-certificates.crt ; then VOLUME_CERTS=\\"--volume=/etc/ssl/certs/ca-certificates.crt:/etc/arvados/ca-certificates.crt\\" ; }
+ .q{elif test -f /etc/pki/tls/certs/ca-bundle.crt ; then VOLUME_CERTS=\\"--volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/arvados/ca-certificates.crt\\" ; fi };
$command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other $arv_file_cache \Q$keep_mnt\E --exec ";
$ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh";
# For now, use the same approach as TASK_WORK above.
$ENV{"JOB_WORK"} = "/tmp/crunch-job-work";
+ # Bind mount the crunchrunner binary and host TLS certificates file into
+ # the container.
+ $command .= "\$VOLUME_CRUNCHRUNNER \$VOLUME_CERTS ";
+
while (my ($env_key, $env_val) = each %ENV)
{
if ($env_key =~ /^(ARVADOS|CRUNCH|JOB|TASK)_/) {
sub reapchildren
{
my $children_reaped = 0;
- while ((my $pid = waitpid (-1, WNOHANG)) > 0)
+ my @successful_task_uuids = ();
+
+ while((my $pid = waitpid (-1, WNOHANG)) > 0)
{
my $childstatus = $?;
. $slot[$proc{$pid}->{slot}]->{cpu});
my $jobstepidx = $proc{$pid}->{jobstepidx};
- if (!WIFEXITED($childstatus))
- {
- # child did not exit (may be temporarily stopped)
- Log ($jobstepidx, "child $pid did not actually exit in reapchildren, ignoring for now.");
- next;
- }
-
$children_reaped++;
my $elapsed = time - $proc{$pid}->{time};
my $Jobstep = $jobstep[$jobstepidx];
push @jobstep_todo, $jobstepidx;
$Job->{'tasks_summary'}->{'failed'}++;
}
- else
+ else # task_success
{
+ push @successful_task_uuids, $Jobstep->{'arvados_task'}->{uuid};
++$thisround_succeeded;
$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
$slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
push @freeslot, $proc{$pid}->{slot};
delete $proc{$pid};
- if ($task_success) {
- # Load new tasks
- my $newtask_list = [];
- my $newtask_results;
- do {
- $newtask_results = api_call(
- "job_tasks/list",
- 'where' => {
- 'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
- },
- 'order' => 'qsequence',
- 'offset' => scalar(@$newtask_list),
- );
- push(@$newtask_list, @{$newtask_results->{items}});
- } while (@{$newtask_results->{items}});
- foreach my $arvados_task (@$newtask_list) {
- my $jobstep = {
- 'level' => $arvados_task->{'sequence'},
- 'failures' => 0,
- 'arvados_task' => $arvados_task
- };
- push @jobstep, $jobstep;
- push @jobstep_todo, $#jobstep;
- }
- }
$progress_is_dirty = 1;
}
+ if (scalar(@successful_task_uuids) > 0)
+ {
+ Log (undef, sprintf("%d tasks exited (%d succeeded), checking for new tasks from API server.", $children_reaped, scalar(@successful_task_uuids)));
+ # Load new tasks
+ my $newtask_list = [];
+ my $newtask_results;
+ do {
+ $newtask_results = api_call(
+ "job_tasks/list",
+ 'filters' => [["created_by_job_task_uuid","in",\@successful_task_uuids]],
+ 'order' => 'qsequence',
+ 'offset' => scalar(@$newtask_list),
+ );
+ push(@$newtask_list, @{$newtask_results->{items}});
+ } while (@{$newtask_results->{items}});
+ Log (undef, sprintf("Got %d new tasks from API server.", scalar(@$newtask_list)));
+ foreach my $arvados_task (@$newtask_list) {
+ my $jobstep = {
+ 'level' => $arvados_task->{'sequence'},
+ 'failures' => 0,
+ 'arvados_task' => $arvados_task
+ };
+ push @jobstep, $jobstep;
+ push @jobstep_todo, $#jobstep;
+ }
+ }
+
return $children_reaped;
}
sub preprocess_stderr
{
my $jobstepidx = shift;
+ # slotindex is only defined for children running Arvados job tasks.
+ # Be prepared to handle the undef case (for setup srun calls, etc.).
+ my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
while ($jobstep[$jobstepidx]->{stderr} =~ /^(.*?)\n/) {
my $line = $1;
# whoa.
$main::please_freeze = 1;
}
- elsif (!exists $jobstep[$jobstepidx]->{slotindex}) {
- # Skip the following tempfail checks if this srun proc isn't
- # attached to a particular worker slot.
- }
elsif ($line =~ /srun: error: (Node failure on|Aborting, .*\bio error\b)/) {
- my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
- $slot[$job_slot_index]->{node}->{fail_count}++;
$jobstep[$jobstepidx]->{tempfail} = 1;
- ban_node_by_slot($job_slot_index);
+ if (defined($job_slot_index)) {
+ $slot[$job_slot_index]->{node}->{fail_count}++;
+ ban_node_by_slot($job_slot_index);
+ }
}
elsif ($line =~ /srun: error: (Unable to create job step|.*: Communication connection failure)/) {
$jobstep[$jobstepidx]->{tempfail} = 1;
- ban_node_by_slot($jobstep[$jobstepidx]->{slotindex});
+ ban_node_by_slot($job_slot_index) if (defined($job_slot_index));
}
elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
$jobstep[$jobstepidx]->{tempfail} = 1;
delete $reader{$jobstepidx};
my $j = pop @jobstep;
+ # If the srun showed signs of tempfail, ensure the caller treats that as a
+ # failure case.
+ if ($main::please_freeze || $j->{tempfail}) {
+ $exited ||= 255;
+ }
return ($exited, $j->{stdout_captured}, $j->{stderr_captured});
}