X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/1d6bcbbdbc374aa25b70c85717f987d2675c1205..d7e559136a7ed5ee3f178b26aa2b5924cea1bca7:/sdk/cli/bin/crunch-job diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index a545cfde0e..4e5b0826b6 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -127,6 +127,7 @@ my $job_api_token; my $no_clear_tmp; my $resume_stash; my $docker_bin = "docker.io"; +my $docker_run_args = ""; GetOptions('force-unlock' => \$force_unlock, 'git-dir=s' => \$git_dir, 'job=s' => \$jobspec, @@ -134,6 +135,7 @@ GetOptions('force-unlock' => \$force_unlock, 'no-clear-tmp' => \$no_clear_tmp, 'resume-stash=s' => \$resume_stash, 'docker-bin=s' => \$docker_bin, + 'docker-run-args=s' => \$docker_run_args, ); if (defined $job_api_token) { @@ -413,11 +415,13 @@ if (!defined $no_clear_tmp) { # If this job requires a Docker image, install that. my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem, $dockeruserarg); if ($docker_locator = $Job->{docker_image_locator}) { + Log (undef, "Install docker image $docker_locator"); ($docker_stream, $docker_hash) = find_docker_image($docker_locator); if (!$docker_hash) { croak("No Docker image hash found from locator $docker_locator"); } + Log (undef, "docker image hash is $docker_hash"); $docker_stream =~ s/^\.//; my $docker_install_script = qq{ if ! $docker_bin images -q --no-trunc --all | grep -qxF \Q$docker_hash\E; then @@ -467,7 +471,7 @@ fi } srun(["srun", "--nodelist=" . $node[0]], ["/bin/sh", "-ec", - "a=`$docker_bin run $try_user_arg $docker_hash id --user` && " . + "a=`$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user` && " . " test \$a -ne 0"], {fork => 1}); if ($? == 0) { @@ -870,11 +874,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) $ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu}; $ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname"; $ENV{"HOME"} = $ENV{"TASK_WORK"}; - $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep"; $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated $ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}}; $ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"}; + my $keep_mnt = $ENV{"TASK_WORK"}.".keep"; + $ENV{"GZIP"} = "-n"; my @srunargs = ( @@ -886,23 +891,32 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) my $stdbuf = " stdbuf --output=0 --error=0 "; + my $arv_file_cache = ""; + if (defined($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'})) { + $arv_file_cache = "--file-cache=" . ($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'} * 1024 * 1024); + } + my $command = - "if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; " - ."mkdir -p $ENV{CRUNCH_TMP} $ENV{JOB_WORK} $ENV{TASK_WORK} $ENV{TASK_KEEPMOUNT} " - ."&& cd $ENV{CRUNCH_TMP} " + "if [ -e \Q$ENV{TASK_WORK}\E ]; then rm -rf \Q$ENV{TASK_WORK}\E; fi; " + ."mkdir -p \Q$ENV{CRUNCH_TMP}\E \Q$ENV{JOB_WORK}\E \Q$ENV{TASK_WORK}\E \Q$keep_mnt\E " + ."&& cd \Q$ENV{CRUNCH_TMP}\E " # These environment variables get used explicitly later in # $command. No tool is expected to read these values directly. .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' = 30 || $progress_is_dirty) { update_progress_stats(); } + if (!$gotsome) { + select (undef, undef, undef, 0.1); + } $working_slot_count = scalar(grep { $_->{node}->{fail_count} == 0 && $_->{node}->{hold_count} < 4 } @slot); if (($thisround_failed_multiple >= 8 && $thisround_succeeded == 0) || @@ -1186,7 +1206,7 @@ sub reapchildren if (!defined $task_success) { # task did not indicate one way or the other --> fail Log($jobstepid, sprintf( - "ERROR: Task process exited %d, but never updated its task record to indicate success and record its output.", + "ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.", exit_status_s($childstatus))); $Jobstep->{'arvados_task'}->{success} = 0; $Jobstep->{'arvados_task'}->save; @@ -1324,8 +1344,9 @@ sub check_squeue # squeue check interval (15s) this should make the squeue check an # infrequent event. my $silent_procs = 0; - for my $jobstep (values %proc) + for my $procinfo (values %proc) { + my $jobstep = $jobstep[$procinfo->{jobstep}]; if ($jobstep->{stderr_at} < $last_squeue_check) { $silent_procs++; @@ -1334,17 +1355,18 @@ sub check_squeue return if $silent_procs == 0; # use killem() on procs whose killtime is reached - while (my ($pid, $jobstep) = each %proc) + while (my ($pid, $procinfo) = each %proc) { - if (exists $jobstep->{killtime} - && $jobstep->{killtime} <= time + my $jobstep = $jobstep[$procinfo->{jobstep}]; + if (exists $procinfo->{killtime} + && $procinfo->{killtime} <= time && $jobstep->{stderr_at} < $last_squeue_check) { my $sincewhen = ""; if ($jobstep->{stderr_at}) { $sincewhen = " in last " . (time - $jobstep->{stderr_at}) . "s"; } - Log($jobstep->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)"); + Log($procinfo->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)"); killem ($pid); } } @@ -1379,12 +1401,12 @@ sub check_squeue } # Check for child procs >60s old and not mentioned by squeue. - while (my ($pid, $jobstep) = each %proc) + while (my ($pid, $procinfo) = each %proc) { - if ($jobstep->{time} < time - 60 - && $jobstep->{jobstepname} - && !exists $ok{$jobstep->{jobstepname}} - && !exists $jobstep->{killtime}) + if ($procinfo->{time} < time - 60 + && $procinfo->{jobstepname} + && !exists $ok{$procinfo->{jobstepname}} + && !exists $procinfo->{killtime}) { # According to slurm, this task has ended (successfully or not) # -- but our srun child hasn't exited. First we must wait (30 @@ -1393,8 +1415,8 @@ sub check_squeue # terminated, we'll conclude some slurm communication # error/delay has caused the task to die without notifying srun, # and we'll kill srun ourselves. - $jobstep->{killtime} = time + 30; - Log($jobstep->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited"); + $procinfo->{killtime} = time + 30; + Log($procinfo->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited"); } } } @@ -1416,15 +1438,21 @@ sub readfrompipes foreach my $job (keys %reader) { my $buf; - while (0 < sysread ($reader{$job}, $buf, 8192)) + if (0 < sysread ($reader{$job}, $buf, 65536)) { print STDERR $buf if $ENV{CRUNCH_DEBUG}; $jobstep[$job]->{stderr_at} = time; $jobstep[$job]->{stderr} .= $buf; + + # Consume everything up to the last \n preprocess_stderr ($job); + if (length ($jobstep[$job]->{stderr}) > 16384) { - substr ($jobstep[$job]->{stderr}, 0, 8192) = ""; + # If we get a lot of stderr without a newline, chop off the + # front to avoid letting our buffer grow indefinitely. + substr ($jobstep[$job]->{stderr}, + 0, length($jobstep[$job]->{stderr}) - 8192) = ""; } $gotsome = 1; } @@ -1445,7 +1473,7 @@ sub preprocess_stderr # whoa. $main::please_freeze = 1; } - elsif ($line =~ /srun: error: Node failure on/) { + elsif ($line =~ /srun: error: (Node failure on|Aborting, .*\bio error\b)/) { my $job_slot_index = $jobstep[$job]->{slotindex}; $slot[$job_slot_index]->{node}->{fail_count}++; $jobstep[$job]->{tempfail} = 1;