From a46f0152c44fe20eba4db38858eaa2f99bae83f2 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 5 Jun 2014 12:43:09 -0400 Subject: [PATCH] 2955: Moved logic to clean jobs table from crunch-dispatch into a standalone script. --- services/api/script/clean_orphan_jobs.rb | 37 ++++++++++++++++++++++++ services/api/script/crunch-dispatch.rb | 22 -------------- 2 files changed, 37 insertions(+), 22 deletions(-) create mode 100755 services/api/script/clean_orphan_jobs.rb diff --git a/services/api/script/clean_orphan_jobs.rb b/services/api/script/clean_orphan_jobs.rb new file mode 100755 index 0000000000..35673c3ccb --- /dev/null +++ b/services/api/script/clean_orphan_jobs.rb @@ -0,0 +1,37 @@ +#!/usr/bin/env ruby + +if ENV["CRUNCH_DISPATCH_LOCKFILE"] + lockfilename = ENV.delete "CRUNCH_DISPATCH_LOCKFILE" + lockfile = File.open(lockfilename, File::RDWR|File::CREAT, 0644) + unless lockfile.flock File::LOCK_EX|File::LOCK_NB + abort "Lock unavailable on #{lockfilename} - exit" + end +end + +ENV["RAILS_ENV"] = ARGV[0] || ENV["RAILS_ENV"] || "development" + +require File.dirname(__FILE__) + '/../config/boot' +require File.dirname(__FILE__) + '/../config/environment' + +def refresh_running + Job.running.each do |jobrecord| + f = Log.where("object_uuid=?", jobrecord.uuid).limit(1).order("created_at desc").first + if f + age = (Time.now - f.created_at) + if age > 300 + $stderr.puts "dispatch: failing orphan job #{jobrecord.uuid}, last log is #{age} seconds old" + # job is marked running, but not known to crunch-dispatcher, and + # hasn't produced any log entries for 5 minutes, so mark it as failed. + jobrecord.running = false + jobrecord.cancelled_at ||= Time.now + jobrecord.finished_at ||= Time.now + if jobrecord.success.nil? + jobrecord.success = false + end + jobrecord.save! + end + end + end +end + +refresh_running diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb index 9a8280fac5..87acb651a4 100755 --- a/services/api/script/crunch-dispatch.rb +++ b/services/api/script/crunch-dispatch.rb @@ -41,27 +41,6 @@ class Dispatcher return act_as_system_user end - def refresh_running - Job.running.each do |jobrecord| - if !@running[jobrecord.uuid] - f = Log.where("object_uuid=?", jobrecord.uuid).limit(1).order("created_at desc").first - age = (Time.now - f.created_at) - if age > 300 - $stderr.puts "dispatch: failing orphan job #{jobrecord.uuid}, last log is #{age} seconds old" - # job is marked running, but not known to crunch-dispatcher, and - # hasn't produced any log entries for 5 minutes, so mark it as failed. - jobrecord.running = false - jobrecord.canceled_at ||= Time.now - jobrecord.finished_at ||= Time.now - if jobrecord.success.nil? - jobrecord.success = false - end - jobrecord.save! - end - end - end - end - def refresh_todo @todo = Job.queue.select do |j| j.repository end @todo_pipelines = PipelineInstance.queue @@ -409,7 +388,6 @@ class Dispatcher end end else - refresh_running unless did_recently(:refresh_running, 60.0) refresh_todo unless did_recently(:refresh_todo, 1.0) update_node_status unless @todo.empty? or did_recently(:start_jobs, 1.0) or $signal[:term] -- 2.30.2