X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/fbd54468b13466839c24d880a3d041d0a49371af..82225f2eeb39f6798ff83e979c28698ff617d414:/sdk/cli/bin/crunch-job diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index ca9db1dacd..86e018cc99 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -126,6 +126,7 @@ my $jobspec; my $job_api_token; my $no_clear_tmp; my $resume_stash; +my $cgroup_root = "/sys/fs/cgroup"; my $docker_bin = "docker.io"; my $docker_run_args = ""; GetOptions('force-unlock' => \$force_unlock, @@ -134,6 +135,7 @@ GetOptions('force-unlock' => \$force_unlock, 'job-api-token=s' => \$job_api_token, 'no-clear-tmp' => \$no_clear_tmp, 'resume-stash=s' => \$resume_stash, + 'cgroup-root=s' => \$cgroup_root, 'docker-bin=s' => \$docker_bin, 'docker-run-args=s' => \$docker_run_args, ); @@ -430,7 +432,7 @@ fi # Determine whether this version of Docker supports memory+swap limits. ($exited, $stdout, $stderr) = srun_sync( - ["srun", "--nodelist=" . $node[0]], + ["srun", "--nodes=1"], [$docker_bin, 'run', '--help'], {label => "check --memory-swap feature"}); $docker_limitmem = ($stdout =~ /--memory-swap/); @@ -453,7 +455,7 @@ fi $try_user_arg = "--user=$try_user"; } my ($exited, $stdout, $stderr) = srun_sync( - ["srun", "--nodelist=" . $node[0]], + ["srun", "--nodes=1"], ["/bin/sh", "-ec", "$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"], {label => $label}); @@ -850,7 +852,10 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' /dev/null ; then VOLUME_CRUNCHRUNNER=\"--volume=\$(which crunchrunner):/usr/local/bin/crunchrunner\" ; fi " + ."&& if test -f /etc/ssl/certs/ca-certificates.crt ; then VOLUME_CERTS=\"--volume=/etc/ssl/certs/ca-certificates.crt:/etc/arvados/ca-certificates.crt\" ; fi " + ."&& if test -f /etc/pki/tls/certs/ca-bundle.crt ; then VOLUME_CERTS=\"--volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/arvados/ca-certificates.crt\" ; fi "; $command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other $arv_file_cache \Q$keep_mnt\E --exec "; $ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh"; @@ -860,7 +865,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) { my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}"; my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid"; - $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 "; + $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 "; $command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy "; # We only set memory limits if Docker lets us limit both memory and swap. # Memory limits alone have been supported longer, but subprocesses tend @@ -915,6 +920,10 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) # For now, use the same approach as TASK_WORK above. $ENV{"JOB_WORK"} = "/tmp/crunch-job-work"; + # Bind mount the crunchrunner binary and host TLS certificates file into + # the container. + $command .= "\"\$VOLUME_CRUNCHRUNNER\" \"\$VOLUME_CERTS\" "; + while (my ($env_key, $env_val) = each %ENV) { if ($env_key =~ /^(ARVADOS|CRUNCH|JOB|TASK)_/) { @@ -940,7 +949,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) } } else { # Non-docker run - $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 "; + $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -poll=10000 "; $command .= $stdbuf; $command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"}; } @@ -1132,7 +1141,9 @@ sub update_progress_stats sub reapchildren { my $children_reaped = 0; - while ((my $pid = waitpid (-1, WNOHANG)) > 0) + my @successful_task_uuids = (); + + while((my $pid = waitpid (-1, WNOHANG)) > 0) { my $childstatus = $?; @@ -1141,13 +1152,6 @@ sub reapchildren . $slot[$proc{$pid}->{slot}]->{cpu}); my $jobstepidx = $proc{$pid}->{jobstepidx}; - if (!WIFEXITED($childstatus)) - { - # child did not exit (may be temporarily stopped) - Log ($jobstepidx, "child $pid did not actually exit in reapchildren, ignoring for now."); - next; - } - $children_reaped++; my $elapsed = time - $proc{$pid}->{time}; my $Jobstep = $jobstep[$jobstepidx]; @@ -1205,8 +1209,9 @@ sub reapchildren push @jobstep_todo, $jobstepidx; $Job->{'tasks_summary'}->{'failed'}++; } - else + else # task_success { + push @successful_task_uuids, $Jobstep->{'arvados_task'}->{uuid}; ++$thisround_succeeded; $slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0; $slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0; @@ -1229,34 +1234,36 @@ sub reapchildren push @freeslot, $proc{$pid}->{slot}; delete $proc{$pid}; - if ($task_success) { - # Load new tasks - my $newtask_list = []; - my $newtask_results; - do { - $newtask_results = api_call( - "job_tasks/list", - 'where' => { - 'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid} - }, - 'order' => 'qsequence', - 'offset' => scalar(@$newtask_list), - ); - push(@$newtask_list, @{$newtask_results->{items}}); - } while (@{$newtask_results->{items}}); - foreach my $arvados_task (@$newtask_list) { - my $jobstep = { - 'level' => $arvados_task->{'sequence'}, - 'failures' => 0, - 'arvados_task' => $arvados_task - }; - push @jobstep, $jobstep; - push @jobstep_todo, $#jobstep; - } - } $progress_is_dirty = 1; } + if (scalar(@successful_task_uuids) > 0) + { + Log (undef, sprintf("%d tasks exited (%d succeeded), checking for new tasks from API server.", $children_reaped, scalar(@successful_task_uuids))); + # Load new tasks + my $newtask_list = []; + my $newtask_results; + do { + $newtask_results = api_call( + "job_tasks/list", + 'filters' => [["created_by_job_task_uuid","in",\@successful_task_uuids]], + 'order' => 'qsequence', + 'offset' => scalar(@$newtask_list), + ); + push(@$newtask_list, @{$newtask_results->{items}}); + } while (@{$newtask_results->{items}}); + Log (undef, sprintf("Got %d new tasks from API server.", scalar(@$newtask_list))); + foreach my $arvados_task (@$newtask_list) { + my $jobstep = { + 'level' => $arvados_task->{'sequence'}, + 'failures' => 0, + 'arvados_task' => $arvados_task + }; + push @jobstep, $jobstep; + push @jobstep_todo, $#jobstep; + } + } + return $children_reaped; } @@ -1466,7 +1473,6 @@ sub preprocess_stderr # attached to a particular worker slot. } elsif ($line =~ /srun: error: (Node failure on|Aborting, .*\bio error\b)/) { - my $job_slot_index = $jobstep[$jobstepidx]->{slotindex}; my $job_slot_index = $jobstep[$jobstepidx]->{slotindex}; $slot[$job_slot_index]->{node}->{fail_count}++; $jobstep[$jobstepidx]->{tempfail} = 1; @@ -1476,7 +1482,7 @@ sub preprocess_stderr $jobstep[$jobstepidx]->{tempfail} = 1; ban_node_by_slot($jobstep[$jobstepidx]->{slotindex}); } - elsif ($line =~ /arvados\.errors\.Keep/) { + elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) { $jobstep[$jobstepidx]->{tempfail} = 1; } }