X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/d501ef45757f78f5c1d2843eccf8016e8978345e..313f5fedd4214d077e2b5c7c26bab4df3895c44a:/sdk/cli/bin/crunch-job?ds=sidebyside diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index 9bad7ae547..617d22f4d1 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -86,6 +86,7 @@ use POSIX ':sys_wait_h'; use POSIX qw(strftime); use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK); use Arvados; +use Data::Dumper; use Digest::MD5 qw(md5_hex); use Getopt::Long; use IPC::Open2; @@ -357,7 +358,7 @@ if (!defined $no_clear_tmp) { if ($cleanpid == 0) { srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}], - ['bash', '-c', 'if mount | grep -q $JOB_WORK/; then for i in $JOB_WORK/*keep; do /bin/fusermount -z -u $i; done; fi; sleep 1; rm -rf $JOB_WORK $CRUNCH_TMP/opt $CRUNCH_TMP/src*']); + ['bash', '-c', 'if mount | grep -q $JOB_WORK/; then for i in $JOB_WORK/*keep $CRUNCH_TMP/task/*.keep; do /bin/fusermount -z -u $i; done; fi; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src*']); exit (1); } while (1) @@ -547,8 +548,6 @@ else { my @execargs = ("sh", "-c", "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -"); - # Note: this section is almost certainly unnecessary if we're - # running tasks in docker containers. my $installpid = fork(); if ($installpid == 0) { @@ -694,7 +693,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) } $ENV{"TASK_SLOT_NODE"} = $slot[$childslot]->{node}->{name}; $ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu}; - $ENV{"TASK_WORK"} = $ENV{"JOB_WORK"}."/$id.$$"; + $ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname"; $ENV{"HOME"} = $ENV{"TASK_WORK"}; $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep"; $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated @@ -723,36 +722,54 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) $command .= "&& exec arv-mount --by-id --allow-other $ENV{TASK_KEEPMOUNT} --exec "; if ($docker_hash) { - $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$ENV{TASK_WORK}/docker.cid -poll=10000 "; - $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --user=crunch --cidfile=$ENV{TASK_WORK}/docker.cid "; + my $cidfile = "$ENV{CRUNCH_TMP}/$ENV{TASK_UUID}.cid"; + $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 "; + $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --attach=stdin -i --user=crunch --cidfile=$cidfile --sig-proxy "; + # Dynamically configure the container to use the host system as its # DNS server. Get the host's global addresses from the ip command, # and turn them into docker --dns options using gawk. $command .= q{$(ip -o address show scope global | gawk 'match($4, /^([0-9\.:]+)\//, x){print "--dns", x[1]}') }; - $command .= "--volume=\Q$ENV{CRUNCH_SRC}:/tmp/crunch-src:ro\E "; + + # The source tree and $destdir directory (which we have + # installed on the worker host) are available in the container, + # under the same path. + $command .= "--volume=\Q$ENV{CRUNCH_SRC}:$ENV{CRUNCH_SRC}:ro\E "; + $command .= "--volume=\Q$ENV{CRUNCH_INSTALL}:$ENV{CRUNCH_INSTALL}:ro\E "; + + # Currently, we make arv-mount's mount point appear at /keep + # inside the container (instead of using the same path as the + # host like we do with CRUNCH_SRC and CRUNCH_INSTALL). However, + # crunch scripts and utilities must not rely on this. They must + # use $TASK_KEEPMOUNT. $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT}:/keep:ro\E "; - $command .= "--env=\QHOME=/home/crunch\E "; + $ENV{TASK_KEEPMOUNT} = "/keep"; + + # TASK_WORK is a plain docker data volume: it starts out empty, + # is writable, and persists until no containers use it any + # more. We don't use --volumes-from to share it with other + # containers: it is only accessible to this task, and it goes + # away when this task stops. + $command .= "--volume=\Q$ENV{TASK_WORK}\E "; + + # JOB_WORK is also a plain docker data volume for now. TODO: + # Share a single JOB_WORK volume across all task containers on a + # given worker node, and delete it when the job ends (and, in + # case that doesn't work, when the next job starts). + $command .= "--volume=\Q$ENV{JOB_WORK}\E "; + while (my ($env_key, $env_val) = each %ENV) { - if ($env_key =~ /^(ARVADOS|JOB|TASK)_/) { - if ($env_key eq "TASK_WORK") { - $command .= "--env=\QTASK_WORK=/tmp/crunch-job\E "; - } - elsif ($env_key eq "TASK_KEEPMOUNT") { - $command .= "--env=\QTASK_KEEPMOUNT=/keep\E "; - } - else { - $command .= "--env=\Q$env_key=$env_val\E "; - } + if ($env_key =~ /^(ARVADOS|CRUNCH|JOB|TASK)_/) { + $command .= "--env=\Q$env_key=$env_val\E "; } } - $command .= "--env=\QCRUNCH_NODE_SLOTS=$ENV{CRUNCH_NODE_SLOTS}\E "; - $command .= "--env=\QCRUNCH_SRC=/tmp/crunch-src\E "; + $command .= "--env=\QHOME=$ENV{HOME}\E "; $command .= "\Q$docker_hash\E "; $command .= "stdbuf --output=0 --error=0 "; - $command .= "/tmp/crunch-src/crunch_scripts/" . $Job->{"script"}; + $command .= "$ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"}; } else { # Non-docker run $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 "; @@ -763,8 +780,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) my @execargs = ('bash', '-c', $command); srun (\@srunargs, \@execargs, undef, $build_script_to_send); # exec() failed, we assume nothing happened. - Log(undef, "srun() failed on build script"); - die; + die "srun() failed on build script\n"; } close("writer"); if (!defined $childpid) @@ -1555,11 +1571,13 @@ sub srun my $opts = shift || {}; my $stdin = shift; my $args = $have_slurm ? [@$srunargs, @$execargs] : $execargs; - print STDERR (join (" ", - map { / / ? "'$_'" : $_ } - (@$args)), - "\n") - if $ENV{CRUNCH_DEBUG}; + + $Data::Dumper::Terse = 1; + $Data::Dumper::Indent = 0; + my $show_cmd = Dumper($args); + $show_cmd =~ s/(TOKEN\\*=)[^\s\']+/${1}[...]/g; + $show_cmd =~ s/\n/ /g; + warn "starting: $show_cmd\n"; if (defined $stdin) { my $child = open STDIN, "-|"; @@ -1692,7 +1710,7 @@ __DATA__ # checkout-and-build use Fcntl ':flock'; -use File::Path qw( make_path ); +use File::Path qw( make_path remove_tree ); my $destdir = $ENV{"CRUNCH_SRC"}; my $commit = $ENV{"CRUNCH_SRC_COMMIT"}; @@ -1700,12 +1718,17 @@ my $repo = $ENV{"CRUNCH_SRC_URL"}; my $task_work = $ENV{"TASK_WORK"}; for my $dir ($destdir, $task_work) { - if ($dir) { - make_path $dir; - -e $dir or die "Failed to create temporary directory ($dir): $!"; - } + if ($dir) { + make_path $dir; + -e $dir or die "Failed to create temporary directory ($dir): $!"; + } } +if ($task_work) { + remove_tree($task_work, {keep_root => 1}); +} + + open L, ">", "$destdir.lock" or die "$destdir.lock: $!"; flock L, LOCK_EX; if (readlink ("$destdir.commit") eq $commit && -d $destdir) { @@ -1774,10 +1797,11 @@ sub shell_or_die print STDERR "@_\n"; } if (system (@_) != 0) { + my $err = $!; my $exitstatus = sprintf("exit %d signal %d", $? >> 8, $? & 0x7f); open STDERR, ">&STDERR_ORIG"; system ("cat $destdir.log >&2"); - die "@_ failed ($!): $exitstatus"; + die "@_ failed ($err): $exitstatus"; } }