some more refactoring and attention to coding standards
[arvados.git] / services / api / script / crunch-dispatch.rb
index 41fef386b4e7170279df9dd0cc9478dadce3b61e..9eb1c28cb2e3fa567a735f7b9556c57ba32e8f1f 100755 (executable)
@@ -145,16 +145,17 @@ class Dispatcher
             api_client_id: 0)
       job_auth.save
 
-      cmd_args << (ENV['CRUNCH_JOB_BIN'] || `which crunch-job`.strip)
+      crunch_job_bin = (ENV['CRUNCH_JOB_BIN'] || `which arv-crunch-job`.strip)
+      if crunch_job_bin == ''
+        raise "No CRUNCH_JOB_BIN env var, and crunch-job not in path."
+      end
+
+      cmd_args << crunch_job_bin
       cmd_args << '--job-api-token'
       cmd_args << job_auth.api_token
       cmd_args << '--job'
       cmd_args << job.uuid
 
-      if cmd_args[0] == ''
-        raise "No CRUNCH_JOB_BIN env var, and crunch-job not in path."
-      end
-
       commit = Commit.where(sha1: job.script_version).first
       if commit
         cmd_args << '--git-dir'
@@ -297,7 +298,6 @@ class Dispatcher
     job_done = j_done[:job]
     $stderr.puts "dispatch: child #{pid_done} exit"
     $stderr.puts "dispatch: job #{job_done.uuid} end"
-    $redis.publish job_done.uuid, "end"
 
     # Ensure every last drop of stdout and stderr is consumed
     read_pipes
@@ -308,9 +308,18 @@ class Dispatcher
     # Wait the thread
     j_done[:wait_thr].value
 
+    jobrecord = Job.find_by_uuid(job_done.uuid)
+    jobrecord.running = false
+    jobrecord.finished_at ||= Time.now,
+    # Don't set 'jobrecord.success = false' because if the job failed to run due to an
+    # issue with crunch-job or slurm, we want the job to stay in the queue.
+    jobrecord.save!
+
     # Invalidate the per-job auth token
     j_done[:job_auth].update_attributes expires_at: Time.now
 
+    $redis.publish job_done.uuid, "end"
+
     @running.delete job_done.uuid
   end
 
@@ -359,8 +368,6 @@ class Dispatcher
     end
   end
 
-
-
   protected
 
   def did_recently(thing, min_interval)