2955: Renamed script to "cancel_stale_jobs". Fixed spelling canceled_at -> cancelled_at.
[arvados.git] / services / api / script / cancel_stale_jobs.rb
1 #!/usr/bin/env ruby
2
3 if ENV["CRUNCH_DISPATCH_LOCKFILE"]
4   lockfilename = ENV.delete "CRUNCH_DISPATCH_LOCKFILE"
5   lockfile = File.open(lockfilename, File::RDWR|File::CREAT, 0644)
6   unless lockfile.flock File::LOCK_EX|File::LOCK_NB
7     abort "Lock unavailable on #{lockfilename} - exit"
8   end
9 end
10
11 ENV["RAILS_ENV"] = ARGV[0] || ENV["RAILS_ENV"] || "development"
12
13 require File.dirname(__FILE__) + '/../config/boot'
14 require File.dirname(__FILE__) + '/../config/environment'
15
16 def cancel_stale_jobs
17   Job.running.each do |jobrecord|
18     f = Log.where("object_uuid=?", jobrecord.uuid).limit(1).order("created_at desc").first
19     if f
20       age = (Time.now - f.created_at)
21       if age > 300
22         $stderr.puts "dispatch: failing orphan job #{jobrecord.uuid}, last log is #{age} seconds old"
23         # job is marked running, but not known to crunch-dispatcher, and
24         # hasn't produced any log entries for 5 minutes, so mark it as failed.
25         jobrecord.running = false
26         jobrecord.cancelled_at ||= Time.now
27         jobrecord.finished_at ||= Time.now
28         if jobrecord.success.nil?
29           jobrecord.success = false
30         end
31         jobrecord.save!
32       end
33     end
34   end
35 end
36
37 cancel_stale_jobs