X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/57ee96926d2d3d315ece3b2cbfb20d0fe01ab912..7aea6d3d35bc622818dbb62929a7f7fc75a4aeaf:/services/api/script/crunch-dispatch.rb diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb index 154fcf3145..bb7ce7e12d 100755 --- a/services/api/script/crunch-dispatch.rb +++ b/services/api/script/crunch-dispatch.rb @@ -2,6 +2,23 @@ include Process +$options = {} +(ARGV.any? ? ARGV : ['--jobs', '--pipelines']).each do |arg| + case arg + when '--jobs' + $options[:jobs] = true + when '--pipelines' + $options[:pipelines] = true + else + abort "Unrecognized command line option '#{arg}'" + end +end +if not ($options[:jobs] or $options[:pipelines]) + abort "Nothing to do. Please specify at least one of: --jobs, --pipelines." +end + +ARGV.reject! { |a| a =~ /--jobs|--pipelines/ } + $warned = {} $signal = {} %w{TERM INT}.each do |sig| @@ -34,8 +51,14 @@ class Dispatcher end def refresh_todo - @todo = Job.queue.select do |j| j.repository end - @todo_pipelines = PipelineInstance.queue + @todo = [] + if $options[:jobs] + @todo = Job.queue.select(&:repository) + end + @todo_pipelines = [] + if $options[:pipelines] + @todo_pipelines = PipelineInstance.queue + end end def sinfo @@ -68,25 +91,28 @@ class Dispatcher begin sinfo.split("\n"). each do |line| - re = line.match /(\S+?):+(idle|alloc|down)/ + re = line.match /(\S+?):+(idle|alloc|down)?/ next if !re + _, node_name, node_state = *re + node_state = 'down' unless %w(idle alloc down).include? node_state + # sinfo tells us about a node N times if it is shared by N partitions - next if node_seen[re[1]] - node_seen[re[1]] = true + next if node_seen[node_name] + node_seen[node_name] = true # update our database (and cache) when a node's state changes - if @node_state[re[1]] != re[2] - @node_state[re[1]] = re[2] - node = Node.where('hostname=?', re[1]).order(:last_ping_at).last + if @node_state[node_name] != node_state + @node_state[node_name] = node_state + node = Node.where('hostname=?', node_name).order(:last_ping_at).last if node - $stderr.puts "dispatch: update #{re[1]} state to #{re[2]}" - node.info['slurm_state'] = re[2] + $stderr.puts "dispatch: update #{node_name} state to #{node_state}" + node.info['slurm_state'] = node_state if not node.save $stderr.puts "dispatch: failed to update #{node.uuid}: #{node.errors.messages}" end - elsif re[2] != 'down' - $stderr.puts "dispatch: sinfo reports '#{re[1]}' is not down, but no node has that name" + elsif node_state != 'down' + $stderr.puts "dispatch: sinfo reports '#{node_name}' is not down, but no node has that name" end end end