X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/a1dc811844d2dc76bea5ebfdc2f571a12cb41b49..fcbb743e3de63e93280f2fbeedea49f98430d26f:/sdk/cli/bin/crunch-job diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index 70ba04fcab..5eb2f902f9 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -127,6 +127,7 @@ my $job_api_token; my $no_clear_tmp; my $resume_stash; my $docker_bin = "docker.io"; +my $docker_run_args = ""; GetOptions('force-unlock' => \$force_unlock, 'git-dir=s' => \$git_dir, 'job=s' => \$jobspec, @@ -134,6 +135,7 @@ GetOptions('force-unlock' => \$force_unlock, 'no-clear-tmp' => \$no_clear_tmp, 'resume-stash=s' => \$resume_stash, 'docker-bin=s' => \$docker_bin, + 'docker-run-args=s' => \$docker_run_args, ); if (defined $job_api_token) { @@ -467,7 +469,7 @@ fi } srun(["srun", "--nodelist=" . $node[0]], ["/bin/sh", "-ec", - "a=`$docker_bin run $try_user_arg $docker_hash id --user` && " . + "a=`$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user` && " . " test \$a -ne 0"], {fork => 1}); if ($? == 0) { @@ -870,11 +872,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) $ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu}; $ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname"; $ENV{"HOME"} = $ENV{"TASK_WORK"}; - $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep"; $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated $ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}}; $ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"}; + my $keep_mnt = $ENV{"TASK_WORK"}.".keep"; + $ENV{"GZIP"} = "-n"; my @srunargs = ( @@ -886,23 +889,32 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) my $stdbuf = " stdbuf --output=0 --error=0 "; + my $arv_file_cache = ""; + if (defined($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'})) { + $arv_file_cache = "--file-cache=" . ($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'} * 1024 * 1024); + } + my $command = - "if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; " - ."mkdir -p $ENV{CRUNCH_TMP} $ENV{JOB_WORK} $ENV{TASK_WORK} $ENV{TASK_KEEPMOUNT} " - ."&& cd $ENV{CRUNCH_TMP} " + "if [ -e \Q$ENV{TASK_WORK}\E ]; then rm -rf \Q$ENV{TASK_WORK}\E; fi; " + ."mkdir -p \Q$ENV{CRUNCH_TMP}\E \Q$ENV{JOB_WORK}\E \Q$ENV{TASK_WORK}\E \Q$keep_mnt\E " + ."&& cd \Q$ENV{CRUNCH_TMP}\E " # These environment variables get used explicitly later in # $command. No tool is expected to read these values directly. .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' &2 2>/dev/null; " . + "mkdir -p \"$ENV{JOB_WORK}\" \"$ENV{TASK_WORK}\" && " . "if which stdbuf >/dev/null ; then " . " exec $stdbuf \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E ;" . " else " . @@ -1182,6 +1201,9 @@ sub reapchildren if (!defined $task_success) { # task did not indicate one way or the other --> fail + Log($jobstepid, sprintf( + "ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.", + exit_status_s($childstatus))); $Jobstep->{'arvados_task'}->{success} = 0; $Jobstep->{'arvados_task'}->save; $task_success = 0; @@ -1439,7 +1461,7 @@ sub preprocess_stderr # whoa. $main::please_freeze = 1; } - elsif ($line =~ /srun: error: Node failure on/) { + elsif ($line =~ /srun: error: (Node failure on|Aborting, io error)/) { my $job_slot_index = $jobstep[$job]->{slotindex}; $slot[$job_slot_index]->{node}->{fail_count}++; $jobstep[$job]->{tempfail} = 1; @@ -2166,10 +2188,11 @@ if (@ARGV) { $Log->("Built Python SDK virtualenv"); } - my $pip_bin = "pip"; + my @pysdk_version_cmd = ("python", "-c", + "from pkg_resources import get_distribution as get; print get('arvados-python-client').version"); if ($venv_built) { $Log->("Running in Python SDK virtualenv"); - $pip_bin = "$venv_dir/bin/pip"; + @pysdk_version_cmd = (); my $orig_argv = join(" ", map { quotemeta($_); } @ARGV); @ARGV = ("/bin/sh", "-ec", ". \Q$venv_dir/bin/activate\E; exec $orig_argv"); @@ -2178,14 +2201,18 @@ if (@ARGV) { "\$PATH. Can't install Python SDK."); } - my $pkgs = `(\Q$pip_bin\E freeze 2>/dev/null | grep arvados) || dpkg-query --show '*arvados*'`; - if ($pkgs) { - $Log->("Using Arvados SDK:"); - foreach my $line (split /\n/, $pkgs) { - $Log->($line); + if (@pysdk_version_cmd) { + open(my $pysdk_version_pipe, "-|", @pysdk_version_cmd); + my $pysdk_version = <$pysdk_version_pipe>; + close($pysdk_version_pipe); + if ($? == 0) { + chomp($pysdk_version); + $Log->("Using Arvados SDK version $pysdk_version"); + } else { + # A lot could've gone wrong here, but pretty much all of it means that + # Python won't be able to load the Arvados SDK. + $Log->("Warning: Arvados SDK not found"); } - } else { - $Log->("Arvados SDK packages not found"); } while (my ($sdk_dir, $sdk_envkey) = each(%SDK_ENVVARS)) {