X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/8e1efc4c74f86a986dd0c6f15f53777d0d2bf0bb..035b113f60302f6d9c265e6e3a63dbb3c5873153:/services/api/lib/crunch_dispatch.rb diff --git a/services/api/lib/crunch_dispatch.rb b/services/api/lib/crunch_dispatch.rb index 3ef1803031..3cabc1e3ce 100644 --- a/services/api/lib/crunch_dispatch.rb +++ b/services/api/lib/crunch_dispatch.rb @@ -1,3 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + require 'open3' require 'shellwords' @@ -425,8 +429,11 @@ class CrunchDispatch i, o, e, t = Open3.popen3(*cmd_args) rescue $stderr.puts "dispatch: popen3: #{$!}" - sleep 1 - next + # This is a dispatch problem like "Too many open files"; + # retrying another job right away would be futile. Just return + # and hope things are better next time, after (at least) a + # did_recently() delay. + return end $stderr.puts "dispatch: job #{job.uuid}" @@ -629,31 +636,11 @@ class CrunchDispatch pid_done = nil j_done = nil - if false - begin - pid_done = waitpid(-1, Process::WNOHANG | Process::WUNTRACED) - if pid_done - j_done = @running.values. - select { |j| j[:wait_thr].pid == pid_done }. - first - end - rescue SystemCallError - # I have @running processes but system reports I have no - # children. This is likely to happen repeatedly if it happens at - # all; I will log this no more than once per child process I - # start. - if 0 < @running.select { |uuid,j| j[:warned_waitpid_error].nil? }.size - children = @running.values.collect { |j| j[:wait_thr].pid }.join ' ' - $stderr.puts "dispatch: IPC bug: waitpid() error (#{$!}), but I have children #{children}" - end - @running.each do |uuid,j| j[:warned_waitpid_error] = true end - end - else - @running.each do |uuid, j| - if j[:wait_thr].status == false - pid_done = j[:wait_thr].pid - j_done = j - end + @running.each do |uuid, j| + if !j[:wait_thr].status + pid_done = j[:wait_thr].pid + j_done = j + break end end