From: Tom Clegg Date: Wed, 2 Mar 2016 16:05:19 +0000 (-0500) Subject: 8343: Allow overriding -cgroup-root=/sys/fs/cgroup via crunch-dispatch run script. X-Git-Tag: 1.1.0~1090^2 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/dabddd30051245241a2ca02c30e8354d68d9eb2b 8343: Allow overriding -cgroup-root=/sys/fs/cgroup via crunch-dispatch run script. --- diff --git a/doc/install/install-crunch-dispatch.html.textile.liquid b/doc/install/install-crunch-dispatch.html.textile.liquid index 0e5be9411a..fda0769abf 100644 --- a/doc/install/install-crunch-dispatch.html.textile.liquid +++ b/doc/install/install-crunch-dispatch.html.textile.liquid @@ -184,6 +184,10 @@ export CRUNCH_DISPATCH_LOCKFILE=/var/lock/crunch-dispatch export HOME=$(pwd) export RAILS_ENV=production +## Uncomment and edit this line if your compute nodes have cgroup info +## somewhere other than /sys/fs/cgroup (e.g., "/cgroup" for CentOS 6) +#export CRUNCH_CGROUP_ROOT="/sys/fs/cgroup" + ## Uncomment this line if your cluster uses self-signed SSL certificates: #export ARVADOS_API_HOST_INSECURE=yes diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index 6bd14234ea..ca6c47bfce 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -126,6 +126,7 @@ my $jobspec; my $job_api_token; my $no_clear_tmp; my $resume_stash; +my $cgroup_root = "/sys/fs/cgroup"; my $docker_bin = "docker.io"; my $docker_run_args = ""; GetOptions('force-unlock' => \$force_unlock, @@ -134,6 +135,7 @@ GetOptions('force-unlock' => \$force_unlock, 'job-api-token=s' => \$job_api_token, 'no-clear-tmp' => \$no_clear_tmp, 'resume-stash=s' => \$resume_stash, + 'cgroup-root=s' => \$cgroup_root, 'docker-bin=s' => \$docker_bin, 'docker-run-args=s' => \$docker_run_args, ); @@ -915,7 +917,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) { my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}"; my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid"; - $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 "; + $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 "; $command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy "; # We only set memory limits if Docker lets us limit both memory and swap. # Memory limits alone have been supported longer, but subprocesses tend @@ -995,7 +997,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) } } else { # Non-docker run - $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 "; + $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -poll=10000 "; $command .= $stdbuf; $command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"}; } diff --git a/services/api/lib/crunch_dispatch.rb b/services/api/lib/crunch_dispatch.rb index b59279e554..11270c39ce 100644 --- a/services/api/lib/crunch_dispatch.rb +++ b/services/api/lib/crunch_dispatch.rb @@ -24,6 +24,7 @@ class CrunchDispatch @docker_bin = ENV['CRUNCH_JOB_DOCKER_BIN'] @docker_run_args = ENV['CRUNCH_JOB_DOCKER_RUN_ARGS'] + @cgroup_root = ENV['CRUNCH_CGROUP_ROOT'] @arvados_internal = Rails.configuration.git_internal_dir if not File.exists? @arvados_internal @@ -384,6 +385,10 @@ class CrunchDispatch '--job', job.uuid, '--git-dir', @arvados_internal] + if @cgroup_root + cmd_args += ['--cgroup-root', @cgroup_root] + end + if @docker_bin cmd_args += ['--docker-bin', @docker_bin] end diff --git a/services/api/test/unit/crunch_dispatch_test.rb b/services/api/test/unit/crunch_dispatch_test.rb index 09f4af6373..900c8e33cb 100644 --- a/services/api/test/unit/crunch_dispatch_test.rb +++ b/services/api/test/unit/crunch_dispatch_test.rb @@ -1,7 +1,10 @@ require 'test_helper' require 'crunch_dispatch' +require 'helpers/git_test_helper' class CrunchDispatchTest < ActiveSupport::TestCase + include GitTestHelper + test 'choose cheaper nodes first' do act_as_system_user do # Replace test fixtures with a set suitable for testing dispatch @@ -100,6 +103,30 @@ class CrunchDispatchTest < ActiveSupport::TestCase end end + test 'override --cgroup-root with CRUNCH_CGROUP_ROOT' do + ENV['CRUNCH_CGROUP_ROOT'] = '/path/to/cgroup' + Rails.configuration.crunch_job_wrapper = :none + act_as_system_user do + j = Job.create(repository: 'active/foo', + script: 'hash', + script_version: '4fe459abe02d9b365932b8f5dc419439ab4e2577', + script_parameters: {}) + ok = false + Open3.expects(:popen3).at_least_once.with do |*args| + if args.index(j.uuid) + ok = ((i = args.index '--cgroup-root') and + (args[i+1] == '/path/to/cgroup')) + end + true + end.raises(StandardError.new('all is well')) + dispatch = CrunchDispatch.new + dispatch.parse_argv ['--jobs'] + dispatch.refresh_todo + dispatch.start_jobs + assert ok + end + end + def assert_with_timeout timeout, message t = 0 while (t += 0.1) < timeout