X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/aabde5c5f0cba198c90558a3f3bb100ea9a09b6d..d5142a69848df7fa506b8cb16a76cb621598769a:/sdk/cli/bin/crunch-job diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index cc47bbeb5d..6ae0481232 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -96,6 +96,7 @@ use File::Temp; use Fcntl ':flock'; use File::Path qw( make_path remove_tree ); +use constant TASK_TEMPFAIL => 111; use constant EX_TEMPFAIL => 75; $ENV{"TMPDIR"} ||= "/tmp"; @@ -375,7 +376,7 @@ if (!defined $no_clear_tmp) { # If this job requires a Docker image, install that. my $docker_bin = "/usr/bin/docker.io"; -my ($docker_locator, $docker_stream, $docker_hash); +my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem); if ($docker_locator = $Job->{docker_image_locator}) { ($docker_stream, $docker_hash) = find_docker_image($docker_locator); if (!$docker_hash) @@ -407,6 +408,12 @@ fi .exit_status_s($?)); } + # Determine whether this version of Docker supports memory+swap limits. + srun(["srun", "--nodelist=" . $node[0]], + ["/bin/sh", "-ec", "$docker_bin run --help | grep -qe --memory-swap="], + {fork => 1}); + $docker_limitmem = ($? == 0); + if ($Job->{arvados_sdk_version}) { # The job also specifies an Arvados SDK version. Add the SDKs to the # tar file for the build script to install. @@ -636,12 +643,44 @@ my $thisround_failed_multiple = 0; @jobstep_todo = sort { $jobstep[$a]->{level} <=> $jobstep[$b]->{level} or $a <=> $b } @jobstep_todo; my $level = $jobstep[$jobstep_todo[0]]->{level}; -Log (undef, "start level $level"); +my $initial_tasks_this_level = 0; +foreach my $id (@jobstep_todo) { + $initial_tasks_this_level++ if ($jobstep[$id]->{level} == $level); +} + +# If the number of tasks scheduled at this level #T is smaller than the number +# of slots available #S, only use the first #T slots, or the first slot on +# each node, whichever number is greater. +# +# When we dispatch tasks later, we'll allocate whole-node resources like RAM +# based on these numbers. Using fewer slots makes more resources available +# to each individual task, which should normally be a better strategy when +# there are fewer of them running with less parallelism. +# +# Note that this calculation is not redone if the initial tasks at +# this level queue more tasks at the same level. This may harm +# overall task throughput for that level. +my @freeslot; +if ($initial_tasks_this_level < @node) { + @freeslot = (0..$#node); +} elsif ($initial_tasks_this_level < @slot) { + @freeslot = (0..$initial_tasks_this_level - 1); +} else { + @freeslot = (0..$#slot); +} +my $round_num_freeslots = scalar(@freeslot); +my %round_max_slots = (); +for (my $ii = $#freeslot; $ii >= 0; $ii--) { + my $this_slot = $slot[$freeslot[$ii]]; + my $node_name = $this_slot->{node}->{name}; + $round_max_slots{$node_name} ||= $this_slot->{cpu}; + last if (scalar(keys(%round_max_slots)) >= @node); +} +Log(undef, "start level $level with $round_num_freeslots slots"); my %proc; -my @freeslot = (0..$#slot); my @holdslot; my %reader; my $progress_is_dirty = 1; @@ -650,7 +689,6 @@ my $progress_stats_updated = 0; update_progress_stats(); - THISROUND: for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) { @@ -704,7 +742,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) $ENV{"HOME"} = $ENV{"TASK_WORK"}; $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep"; $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated - $ENV{"CRUNCH_NODE_SLOTS"} = $slot[$childslot]->{node}->{ncpus}; + $ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}}; $ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"}; $ENV{"GZIP"} = "-n"; @@ -718,13 +756,26 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) my $command = "if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; " ."mkdir -p $ENV{CRUNCH_TMP} $ENV{JOB_WORK} $ENV{TASK_WORK} $ENV{TASK_KEEPMOUNT} " - ."&& cd $ENV{CRUNCH_TMP} "; + ."&& cd $ENV{CRUNCH_TMP} " + # These environment variables get used explicitly later in + # $command. No tool is expected to read these values directly. + .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' @freeslot && $todo_ptr+1 > $#jobstep_todo)) + ($round_num_freeslots > @freeslot && $todo_ptr+1 > $#jobstep_todo)) { last THISROUND if $main::please_freeze || defined($main::success); if ($main::please_info) @@ -1011,7 +1062,7 @@ sub reapchildren { my $temporary_fail; $temporary_fail ||= $Jobstep->{node_fail}; - $temporary_fail ||= ($exitvalue == 111); + $temporary_fail ||= ($exitvalue == TASK_TEMPFAIL); ++$thisround_failed; ++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1; @@ -1232,7 +1283,7 @@ sub preprocess_stderr # whoa. $main::please_freeze = 1; } - elsif ($line =~ /srun: error: (Node failure on|Unable to create job step) /) { + elsif ($line =~ /(srun: error: (Node failure on|Unable to create job step|.*: Communication connection failure))|arvados.errors.Keep/) { $jobstep[$job]->{node_fail} = 1; ban_node_by_slot($jobstep[$job]->{slotindex}); } @@ -1831,6 +1882,8 @@ use Fcntl ':flock'; use File::Path qw( make_path remove_tree ); use POSIX qw(getcwd); +use constant TASK_TEMPFAIL => 111; + # Map SDK subdirectories to the path environments they belong to. my %SDK_ENVVARS = ("perl/lib" => "PERLLIB", "ruby/lib" => "RUBYLIB"); @@ -1876,9 +1929,9 @@ if (@ARGV) { my $venv_dir = "$job_work/.arvados.venv"; my $venv_built = -e "$venv_dir/bin/activate"; if ((!$venv_built) and (-d $python_src) and can_run("virtualenv")) { - shell_or_die("virtualenv", "--quiet", "--system-site-packages", + shell_or_die(undef, "virtualenv", "--quiet", "--system-site-packages", "--python=python2.7", $venv_dir); - shell_or_die("$venv_dir/bin/pip", "--quiet", "install", "-I", $python_src); + shell_or_die(TASK_TEMPFAIL, "$venv_dir/bin/pip", "--quiet", "install", "-I", $python_src); $venv_built = 1; $Log->("Built Python SDK virtualenv"); } @@ -1974,12 +2027,12 @@ if ((-d $python_dir) and can_run("python2.7") and } if (-e "$destdir/crunch_scripts/install") { - shell_or_die ("$destdir/crunch_scripts/install", $install_dir); + shell_or_die (undef, "$destdir/crunch_scripts/install", $install_dir); } elsif (!-e "./install.sh" && -e "./tests/autotests.sh") { # Old version - shell_or_die ("./tests/autotests.sh", $install_dir); + shell_or_die (undef, "./tests/autotests.sh", $install_dir); } elsif (-e "./install.sh") { - shell_or_die ("./install.sh", $install_dir); + shell_or_die (undef, "./install.sh", $install_dir); } if ($commit) { @@ -2000,15 +2053,24 @@ sub can_run { sub shell_or_die { + my $exitcode = shift; + if ($ENV{"DEBUG"}) { print STDERR "@_\n"; } if (system (@_) != 0) { my $err = $!; - my $exitstatus = sprintf("exit %d signal %d", $? >> 8, $? & 0x7f); + my $code = $?; + my $exitstatus = sprintf("exit %d signal %d", $code >> 8, $code & 0x7f); open STDERR, ">&STDERR_ORIG"; system ("cat $destdir.log >&2"); - die "@_ failed ($err): $exitstatus"; + warn "@_ failed ($err): $exitstatus"; + if (defined($exitcode)) { + exit $exitcode; + } + else { + exit (($code >> 8) || 1); + } } }