X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/b13876638cf57b400bd59513a0a1811b3d2993a1..a70b961cc07a399c123774f2acdc5c118efa2791:/sdk/cli/bin/crunch-job diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index fd598c9dd3..9343fcfbfd 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -1,10 +1,9 @@ #!/usr/bin/env perl +# -*- mode: perl; perl-indent-level: 2; indent-tabs-mode: nil; -*- # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: AGPL-3.0 -# -*- mode: perl; perl-indent-level: 2; indent-tabs-mode: nil; -*- - =head1 NAME crunch-job: Execute job steps, save snapshots as requested, collate output. @@ -837,8 +836,8 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) close($_); } fcntl ("writer", F_SETFL, 0) or croak ($!); # no close-on-exec - open(STDOUT,">&writer"); - open(STDERR,">&writer"); + open(STDOUT,">&writer") or croak ($!); + open(STDERR,">&writer") or croak ($!); undef $dbh; undef $sth; @@ -1188,6 +1187,8 @@ sub reapchildren . $slot[$proc{$pid}->{slot}]->{cpu}); my $jobstepidx = $proc{$pid}->{jobstepidx}; + readfrompipes_after_exit ($jobstepidx); + $children_reaped++; my $elapsed = time - $proc{$pid}->{time}; my $Jobstep = $jobstep[$jobstepidx]; @@ -1259,7 +1260,6 @@ sub reapchildren $Jobstep->{finishtime} = time; $Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime}); retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API"); - process_stderr_final ($jobstepidx); Log ($jobstepidx, sprintf("task output (%d bytes): %s", length($Jobstep->{'arvados_task'}->{output}), $Jobstep->{'arvados_task'}->{output})); @@ -1562,9 +1562,27 @@ sub preprocess_stderr } -sub process_stderr_final +# Read whatever is still available on its stderr+stdout pipes after +# the given child process has exited. +sub readfrompipes_after_exit { my $jobstepidx = shift; + + # The fact that the child has exited allows some convenient + # simplifications: (1) all data must have already been written, so + # there's no need to wait for more once sysread returns 0; (2) the + # total amount of data available is bounded by the pipe buffer size, + # so it's safe to read everything into one string. + my $buf; + while (0 < sysread ($reader{$jobstepidx}, $buf, 65536)) { + $jobstep[$jobstepidx]->{stderr_at} = time; + $jobstep[$jobstepidx]->{stderr} .= $buf; + } + if ($jobstep[$jobstepidx]->{stdout_r}) { + while (0 < sysread ($jobstep[$jobstepidx]->{stdout_r}, $buf, 65536)) { + $jobstep[$jobstepidx]->{stdout_captured} .= $buf; + } + } preprocess_stderr ($jobstepidx); map { @@ -1996,8 +2014,8 @@ sub srun_sync close($stdout_r); fcntl($stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec fcntl($stdout_w, F_SETFL, 0) or croak($!); - open(STDERR, ">&", $stderr_w); - open(STDOUT, ">&", $stdout_w); + open(STDERR, ">&", $stderr_w) or croak ($!); + open(STDOUT, ">&", $stdout_w) or croak ($!); srun ($srunargs, $execargs, $opts, $stdin); exit (1); } @@ -2036,8 +2054,7 @@ sub srun_sync } my $exited = $?; - 1 while readfrompipes(); - process_stderr_final ($jobstepidx); + readfrompipes_after_exit ($jobstepidx); Log (undef, "$label: exit ".exit_status_s($exited)); @@ -2493,8 +2510,8 @@ if ((-d $python_dir) and can_run("python2.7")) { # Hide messages from the install script (unless it fails: shell_or_die # will show $destdir.log in that case). -open(STDOUT, ">>", "$destdir.log"); -open(STDERR, ">&", STDOUT); +open(STDOUT, ">>", "$destdir.log") or die ($!); +open(STDERR, ">&", STDOUT) or die ($!); if (-e "$destdir/crunch_scripts/install") { shell_or_die (undef, "$destdir/crunch_scripts/install", $install_dir); @@ -2515,7 +2532,7 @@ close L; sub can_run { my $command_name = shift; - open(my $which, "-|", "which", $command_name); + open(my $which, "-|", "which", $command_name) or die ($!); while (<$which>) { } close($which); return ($? == 0);