From e6e94ca43f788b28d61c03f81b15ab5647f22d4c Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Tue, 13 May 2014 14:45:42 -0400 Subject: [PATCH] Fix job state cleanup in crunch-job failure case. --- services/api/script/crunch-dispatch.rb | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb index f15258d420..43a527afac 100755 --- a/services/api/script/crunch-dispatch.rb +++ b/services/api/script/crunch-dispatch.rb @@ -314,11 +314,21 @@ class Dispatcher j_done[:wait_thr].value jobrecord = Job.find_by_uuid(job_done.uuid) - jobrecord.running = false - jobrecord.finished_at ||= Time.now - # Don't set 'jobrecord.success = false' because if the job failed to run due to an - # issue with crunch-job or slurm, we want the job to stay in the queue. - jobrecord.save! + if jobrecord.started_at + # Clean up state fields in case crunch-job exited without + # putting the job in a suitable "finished" state. + jobrecord.running = false + jobrecord.finished_at ||= Time.now + if jobrecord.success.nil? + jobrecord.success = false + end + jobrecord.save! + else + # Don't fail the job if crunch-job didn't even get as far as + # starting it. If the job failed to run due to an infrastructure + # issue with crunch-job or slurm, we want the job to stay in the + # queue. + end # Invalidate the per-job auth token j_done[:job_auth].update_attributes expires_at: Time.now -- 2.30.2