X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/a4acb3ae95b2fc7f4b5f1e174c910a54cc6681da..0c0f18dfbcdcf552889258b76563315fbe2eb060:/sdk/cli/bin/crunch-job diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index 555c4d19a6..5177434b25 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -467,7 +467,7 @@ fi } srun(["srun", "--nodelist=" . $node[0]], ["/bin/sh", "-ec", - "a=`$docker_bin run --rm $try_user_arg $docker_hash id --user` && " . + "a=`$docker_bin run $try_user_arg $docker_hash id --user` && " . " test \$a -ne 0"], {fork => 1}); if ($? == 0) { @@ -820,6 +820,9 @@ update_progress_stats(); THISROUND: for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) { + # Don't create new tasks if we already know the job's final result. + last if defined($main::success); + my $id = $jobstep_todo[$todo_ptr]; my $Jobstep = $jobstep[$id]; if ($Jobstep->{level} != $level) @@ -893,12 +896,13 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' &2 2>/dev/null; " . + "mkdir -p \"$ENV{JOB_WORK}\" \"$ENV{TASK_WORK}\" && " . "if which stdbuf >/dev/null ; then " . " exec $stdbuf \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E ;" . " else " . @@ -1017,7 +1024,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) || ($round_num_freeslots > @freeslot && $todo_ptr+1 > $#jobstep_todo)) { - last THISROUND if $main::please_freeze || defined($main::success); + last THISROUND if $main::please_freeze; if ($main::please_info) { $main::please_info = 0; @@ -1178,6 +1185,9 @@ sub reapchildren if (!defined $task_success) { # task did not indicate one way or the other --> fail + Log($jobstepid, sprintf( + "ERROR: Task process exited %d, but never updated its task record to indicate success and record its output.", + exit_status_s($childstatus))); $Jobstep->{'arvados_task'}->{success} = 0; $Jobstep->{'arvados_task'}->save; $task_success = 0; @@ -1694,20 +1704,24 @@ sub log_writer_finish() close($log_pipe_in); + my $logger_failed = 0; my $read_result = log_writer_read_output(120); if ($read_result == -1) { + $logger_failed = -1; Log (undef, "timed out reading from 'arv-put'"); } elsif ($read_result != 0) { + $logger_failed = -2; Log(undef, "failed to read arv-put log manifest to EOF"); } waitpid($log_pipe_pid, 0); if ($?) { + $logger_failed ||= $?; Log(undef, "log_writer_finish: arv-put exited " . exit_status_s($?)) } close($log_pipe_out); - my $arv_put_output = $log_pipe_out_buf; + my $arv_put_output = $logger_failed ? undef : $log_pipe_out_buf; $log_pipe_pid = $log_pipe_in = $log_pipe_out = $log_pipe_out_buf = $log_pipe_out_select = undef; @@ -1773,13 +1787,13 @@ sub save_meta my $justcheckpoint = shift; # false if this will be the last meta saved return if $justcheckpoint; # checkpointing is not relevant post-Warehouse.pm return unless log_writer_is_active(); + my $log_manifest = log_writer_finish(); + return unless defined($log_manifest); - my $log_manifest = ""; if ($Job->{log}) { my $prev_log_coll = api_call("collections/get", uuid => $Job->{log}); - $log_manifest .= $prev_log_coll->{manifest_text}; + $log_manifest = $prev_log_coll->{manifest_text} . $log_manifest; } - $log_manifest .= log_writer_finish(); my $log_coll = api_call( "collections/create", ensure_unique_name => 1, collection => { @@ -2158,10 +2172,11 @@ if (@ARGV) { $Log->("Built Python SDK virtualenv"); } - my $pip_bin = "pip"; + my @pysdk_version_cmd = ("python", "-c", + "from pkg_resources import get_distribution as get; print get('arvados-python-client').version"); if ($venv_built) { $Log->("Running in Python SDK virtualenv"); - $pip_bin = "$venv_dir/bin/pip"; + @pysdk_version_cmd = (); my $orig_argv = join(" ", map { quotemeta($_); } @ARGV); @ARGV = ("/bin/sh", "-ec", ". \Q$venv_dir/bin/activate\E; exec $orig_argv"); @@ -2170,14 +2185,18 @@ if (@ARGV) { "\$PATH. Can't install Python SDK."); } - my $pkgs = `(\Q$pip_bin\E freeze 2>/dev/null | grep arvados) || dpkg-query --show '*arvados*'`; - if ($pkgs) { - $Log->("Using Arvados SDK:"); - foreach my $line (split /\n/, $pkgs) { - $Log->($line); + if (@pysdk_version_cmd) { + open(my $pysdk_version_pipe, "-|", @pysdk_version_cmd); + my $pysdk_version = <$pysdk_version_pipe>; + close($pysdk_version_pipe); + if ($? == 0) { + chomp($pysdk_version); + $Log->("Using Arvados SDK version $pysdk_version"); + } else { + # A lot could've gone wrong here, but pretty much all of it means that + # Python won't be able to load the Arvados SDK. + $Log->("Warning: Arvados SDK not found"); } - } else { - $Log->("Arvados SDK packages not found"); } while (my ($sdk_dir, $sdk_envkey) = each(%SDK_ENVVARS)) {