X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/fdc9a9308c646d23ec50073833f141ceebf78613..b026643583d835a15d3baf6edef2df16ce678307:/sdk/cli/bin/crunch-job diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index 4e5b0826b6..ca6c47bfce 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -126,6 +126,7 @@ my $jobspec; my $job_api_token; my $no_clear_tmp; my $resume_stash; +my $cgroup_root = "/sys/fs/cgroup"; my $docker_bin = "docker.io"; my $docker_run_args = ""; GetOptions('force-unlock' => \$force_unlock, @@ -134,6 +135,7 @@ GetOptions('force-unlock' => \$force_unlock, 'job-api-token=s' => \$job_api_token, 'no-clear-tmp' => \$no_clear_tmp, 'resume-stash=s' => \$resume_stash, + 'cgroup-root=s' => \$cgroup_root, 'docker-bin=s' => \$docker_bin, 'docker-run-args=s' => \$docker_run_args, ); @@ -432,7 +434,7 @@ fi if ($docker_pid == 0) { srun (["srun", "--nodelist=" . join(',', @node)], - ["/bin/sh", "-ec", $docker_install_script]); + ["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script]); exit ($?); } while (1) @@ -443,8 +445,8 @@ fi } if ($? != 0) { - croak("Installing Docker image from $docker_locator exited " - .exit_status_s($?)); + Log(undef, "Installing Docker image from $docker_locator exited " . exit_status_s($?)); + exit(EX_RETRY_UNLOCKED); } # Determine whether this version of Docker supports memory+swap limits. @@ -915,7 +917,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) { my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}"; my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid"; - $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 "; + $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 "; $command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy "; # We only set memory limits if Docker lets us limit both memory and swap. # Memory limits alone have been supported longer, but subprocesses tend @@ -995,7 +997,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) } } else { # Non-docker run - $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 "; + $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -poll=10000 "; $command .= $stdbuf; $command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"}; } @@ -1483,7 +1485,7 @@ sub preprocess_stderr $jobstep[$job]->{tempfail} = 1; ban_node_by_slot($jobstep[$job]->{slotindex}); } - elsif ($line =~ /arvados\.errors\.Keep/) { + elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) { $jobstep[$job]->{tempfail} = 1; } }